1/*
2******************************************************************************
3*
4*   Copyright (C) 1997-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10*
11*   Date        Name        Description
12*   04/14/97    aliu        Creation.
13*   04/24/97    aliu        Added getDefaultDataDirectory() and
14*                            getDefaultLocaleID().
15*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
16*                            for assumed case.  Non-UNIX platforms must be
17*                            special-cased.  Rewrote numeric methods dealing
18*                            with NaN and Infinity to be platform independent
19*                             over all IEEE 754 platforms.
20*   05/13/97    aliu        Restored sign of timezone
21*                            (semantics are hours West of GMT)
22*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23*                             nextDouble..
24*   07/22/98    stephen     Added remainder, max, min, trunc
25*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
26*   08/24/98    stephen     Added longBitsFromDouble
27*   09/08/98    stephen     Minor changes for Mac Port
28*   03/02/99    stephen     Removed openFile().  Added AS400 support.
29*                            Fixed EBCDIC tables
30*   04/15/99    stephen     Converted to C.
31*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
32*   08/04/99    jeffrey R.  Added OS/2 changes
33*   11/15/99    helena      Integrated S/390 IEEE support.
34*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
35*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
36*   01/03/08    Steven L.   Fake Time Support
37******************************************************************************
38*/
39
40/* Define _XOPEN_SOURCE for access to POSIX functions. */
41#ifdef _XOPEN_SOURCE
42    /* Use the predefined value. */
43#else
44    /*
45     * Version 6.0:
46     * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition)
47     * also known as
48     * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03)
49     */
50#   define _XOPEN_SOURCE 600
51#endif
52
53/* Make sure things like readlink and such functions work.
54Poorly upgraded Solaris machines can't have this defined.
55Cleanly installed Solaris can use this #define.
56*/
57#if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
58#define _XOPEN_SOURCE_EXTENDED 1
59#endif
60
61/* include ICU headers */
62#include "unicode/utypes.h"
63#include "unicode/putil.h"
64#include "unicode/ustring.h"
65#include "putilimp.h"
66#include "uassert.h"
67#include "umutex.h"
68#include "cmemory.h"
69#include "cstring.h"
70#include "locmap.h"
71#include "ucln_cmn.h"
72
73/* Include standard headers. */
74#include <stdio.h>
75#include <stdlib.h>
76#include <string.h>
77#include <math.h>
78#include <locale.h>
79#include <float.h>
80#include <time.h>
81
82/* include system headers */
83#ifdef U_WINDOWS
84#   define WIN32_LEAN_AND_MEAN
85#   define VC_EXTRALEAN
86#   define NOUSER
87#   define NOSERVICE
88#   define NOIME
89#   define NOMCX
90#   include <windows.h>
91#   include "wintz.h"
92#elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
93/* tzset isn't defined in strict ANSI on Cygwin. */
94#   undef __STRICT_ANSI__
95#elif defined(OS400)
96#   include <float.h>
97#   include <qusec.h>       /* error code structure */
98#   include <qusrjobi.h>
99#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
100#   include <mih/testptr.h> /* For uprv_maximumPtr */
101#elif defined(XP_MAC)
102#   include <Files.h>
103#   include <IntlResources.h>
104#   include <Script.h>
105#   include <Folders.h>
106#   include <MacTypes.h>
107#   include <TextUtils.h>
108#   define ICU_NO_USER_DATA_OVERRIDE 1
109#elif defined(OS390)
110#include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
111#elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
112#include <limits.h>
113#include <unistd.h>
114#elif defined(U_QNX)
115#include <sys/neutrino.h>
116#elif defined(U_SOLARIS)
117# ifndef _XPG4_2
118#  define _XPG4_2
119# endif
120#endif
121
122
123#if defined(U_DARWIN)
124#include <TargetConditionals.h>
125#endif
126
127#ifndef U_WINDOWS
128#include <sys/time.h>
129#endif
130
131/*
132 * Only include langinfo.h if we have a way to get the codeset. If we later
133 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
134 *
135 */
136
137#if U_HAVE_NL_LANGINFO_CODESET
138#include <langinfo.h>
139#endif
140
141/**
142 * Simple things (presence of functions, etc) should just go in configure.in and be added to
143 * icucfg.h via autoheader.
144 */
145#if defined(HAVE_CONFIG_H)
146#include "icucfg.h"
147#endif
148
149/* Define the extension for data files, again... */
150#define DATA_TYPE "dat"
151
152/* Leave this copyright notice here! */
153static const char copyright[] = U_COPYRIGHT_STRING;
154
155/* floating point implementations ------------------------------------------- */
156
157/* We return QNAN rather than SNAN*/
158#define SIGN 0x80000000U
159
160/* Make it easy to define certain types of constants */
161typedef union {
162    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
163    double d64;
164} BitPatternConversion;
165static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
166static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
167
168/*---------------------------------------------------------------------------
169  Platform utilities
170  Our general strategy is to assume we're on a POSIX platform.  Platforms which
171  are non-POSIX must declare themselves so.  The default POSIX implementation
172  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
173  functions).
174  ---------------------------------------------------------------------------*/
175
176#if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
177#   undef U_POSIX_LOCALE
178#else
179#   define U_POSIX_LOCALE    1
180#endif
181
182/*
183    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
184    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
185*/
186#if !IEEE_754
187static char*
188u_topNBytesOfDouble(double* d, int n)
189{
190#if U_IS_BIG_ENDIAN
191    return (char*)d;
192#else
193    return (char*)(d + 1) - n;
194#endif
195}
196
197static char*
198u_bottomNBytesOfDouble(double* d, int n)
199{
200#if U_IS_BIG_ENDIAN
201    return (char*)(d + 1) - n;
202#else
203    return (char*)d;
204#endif
205}
206#endif   /* !IEEE_754 */
207
208#if IEEE_754
209static UBool
210u_signBit(double d) {
211    uint8_t hiByte;
212#if U_IS_BIG_ENDIAN
213    hiByte = *(uint8_t *)&d;
214#else
215    hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
216#endif
217    return (hiByte & 0x80) != 0;
218}
219#endif
220
221
222
223#if defined (U_DEBUG_FAKETIME)
224/* Override the clock to test things without having to move the system clock.
225 * Assumes POSIX gettimeofday() will function
226 */
227UDate fakeClock_t0 = 0; /** Time to start the clock from **/
228UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
229UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
230static UMTX fakeClockMutex = NULL;
231
232static UDate getUTCtime_real() {
233    struct timeval posixTime;
234    gettimeofday(&posixTime, NULL);
235    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
236}
237
238static UDate getUTCtime_fake() {
239    umtx_lock(&fakeClockMutex);
240    if(!fakeClock_set) {
241        UDate real = getUTCtime_real();
242        const char *fake_start = getenv("U_FAKETIME_START");
243        if((fake_start!=NULL) && (fake_start[0]!=0)) {
244            sscanf(fake_start,"%lf",&fakeClock_t0);
245            fakeClock_dt = fakeClock_t0 - real;
246            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
247                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
248                    fakeClock_t0, fake_start, fakeClock_dt, real);
249        } else {
250          fakeClock_dt = 0;
251            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
252                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
253        }
254        fakeClock_set = TRUE;
255    }
256    umtx_unlock(&fakeClockMutex);
257
258    return getUTCtime_real() + fakeClock_dt;
259}
260#endif
261
262#if defined(U_WINDOWS)
263typedef union {
264    int64_t int64;
265    FILETIME fileTime;
266} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
267
268/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
269#define EPOCH_BIAS  INT64_C(116444736000000000)
270#define HECTONANOSECOND_PER_MILLISECOND   10000
271
272#endif
273
274/*---------------------------------------------------------------------------
275  Universal Implementations
276  These are designed to work on all platforms.  Try these, and if they
277  don't work on your platform, then special case your platform with new
278  implementations.
279---------------------------------------------------------------------------*/
280
281U_CAPI UDate U_EXPORT2
282uprv_getUTCtime()
283{
284#if defined(U_DEBUG_FAKETIME)
285    return getUTCtime_fake(); /* Hook for overriding the clock */
286#else
287    return uprv_getRawUTCtime();
288#endif
289}
290
291/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
292U_CAPI UDate U_EXPORT2
293uprv_getRawUTCtime()
294{
295#if defined(XP_MAC)
296    time_t t, t1, t2;
297    struct tm tmrec;
298
299    uprv_memset( &tmrec, 0, sizeof(tmrec) );
300    tmrec.tm_year = 70;
301    tmrec.tm_mon = 0;
302    tmrec.tm_mday = 1;
303    t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
304
305    time(&t);
306    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
307    t2 = mktime(&tmrec);    /* seconds of current GMT*/
308    return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
309#elif defined(U_WINDOWS)
310
311    FileTimeConversion winTime;
312    GetSystemTimeAsFileTime(&winTime.fileTime);
313    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
314#else
315
316#if defined(HAVE_GETTIMEOFDAY)
317    struct timeval posixTime;
318    gettimeofday(&posixTime, NULL);
319    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
320#else
321    time_t epochtime;
322    time(&epochtime);
323    return (UDate)epochtime * U_MILLIS_PER_SECOND;
324#endif
325
326#endif
327}
328
329/*-----------------------------------------------------------------------------
330  IEEE 754
331  These methods detect and return NaN and infinity values for doubles
332  conforming to IEEE 754.  Platforms which support this standard include X86,
333  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
334  If this doesn't work on your platform, you have non-IEEE floating-point, and
335  will need to code your own versions.  A naive implementation is to return 0.0
336  for getNaN and getInfinity, and false for isNaN and isInfinite.
337  ---------------------------------------------------------------------------*/
338
339U_CAPI UBool U_EXPORT2
340uprv_isNaN(double number)
341{
342#if IEEE_754
343    BitPatternConversion convertedNumber;
344    convertedNumber.d64 = number;
345    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
346    return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
347
348#elif defined(OS390)
349    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
350                        sizeof(uint32_t));
351    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
352                        sizeof(uint32_t));
353
354    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
355      (lowBits == 0x00000000L);
356
357#else
358    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
359    /* you'll need to replace this default implementation with what's correct*/
360    /* for your platform.*/
361    return number != number;
362#endif
363}
364
365U_CAPI UBool U_EXPORT2
366uprv_isInfinite(double number)
367{
368#if IEEE_754
369    BitPatternConversion convertedNumber;
370    convertedNumber.d64 = number;
371    /* Infinity is exactly 0x7FF0000000000000U. */
372    return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
373#elif defined(OS390)
374    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
375                        sizeof(uint32_t));
376    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
377                        sizeof(uint32_t));
378
379    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
380
381#else
382    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
383    /* value, you'll need to replace this default implementation with what's*/
384    /* correct for your platform.*/
385    return number == (2.0 * number);
386#endif
387}
388
389U_CAPI UBool U_EXPORT2
390uprv_isPositiveInfinity(double number)
391{
392#if IEEE_754 || defined(OS390)
393    return (UBool)(number > 0 && uprv_isInfinite(number));
394#else
395    return uprv_isInfinite(number);
396#endif
397}
398
399U_CAPI UBool U_EXPORT2
400uprv_isNegativeInfinity(double number)
401{
402#if IEEE_754 || defined(OS390)
403    return (UBool)(number < 0 && uprv_isInfinite(number));
404
405#else
406    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
407                        sizeof(uint32_t));
408    return((highBits & SIGN) && uprv_isInfinite(number));
409
410#endif
411}
412
413U_CAPI double U_EXPORT2
414uprv_getNaN()
415{
416#if IEEE_754 || defined(OS390)
417    return gNan.d64;
418#else
419    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
420    /* you'll need to replace this default implementation with what's correct*/
421    /* for your platform.*/
422    return 0.0;
423#endif
424}
425
426U_CAPI double U_EXPORT2
427uprv_getInfinity()
428{
429#if IEEE_754 || defined(OS390)
430    return gInf.d64;
431#else
432    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
433    /* value, you'll need to replace this default implementation with what's*/
434    /* correct for your platform.*/
435    return 0.0;
436#endif
437}
438
439U_CAPI double U_EXPORT2
440uprv_floor(double x)
441{
442    return floor(x);
443}
444
445U_CAPI double U_EXPORT2
446uprv_ceil(double x)
447{
448    return ceil(x);
449}
450
451U_CAPI double U_EXPORT2
452uprv_round(double x)
453{
454    return uprv_floor(x + 0.5);
455}
456
457U_CAPI double U_EXPORT2
458uprv_fabs(double x)
459{
460    return fabs(x);
461}
462
463U_CAPI double U_EXPORT2
464uprv_modf(double x, double* y)
465{
466    return modf(x, y);
467}
468
469U_CAPI double U_EXPORT2
470uprv_fmod(double x, double y)
471{
472    return fmod(x, y);
473}
474
475U_CAPI double U_EXPORT2
476uprv_pow(double x, double y)
477{
478    /* This is declared as "double pow(double x, double y)" */
479    return pow(x, y);
480}
481
482U_CAPI double U_EXPORT2
483uprv_pow10(int32_t x)
484{
485    return pow(10.0, (double)x);
486}
487
488U_CAPI double U_EXPORT2
489uprv_fmax(double x, double y)
490{
491#if IEEE_754
492    /* first handle NaN*/
493    if(uprv_isNaN(x) || uprv_isNaN(y))
494        return uprv_getNaN();
495
496    /* check for -0 and 0*/
497    if(x == 0.0 && y == 0.0 && u_signBit(x))
498        return y;
499
500#endif
501
502    /* this should work for all flt point w/o NaN and Inf special cases */
503    return (x > y ? x : y);
504}
505
506U_CAPI double U_EXPORT2
507uprv_fmin(double x, double y)
508{
509#if IEEE_754
510    /* first handle NaN*/
511    if(uprv_isNaN(x) || uprv_isNaN(y))
512        return uprv_getNaN();
513
514    /* check for -0 and 0*/
515    if(x == 0.0 && y == 0.0 && u_signBit(y))
516        return y;
517
518#endif
519
520    /* this should work for all flt point w/o NaN and Inf special cases */
521    return (x > y ? y : x);
522}
523
524/**
525 * Truncates the given double.
526 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
527 * This is different than calling floor() or ceil():
528 * floor(3.3) = 3, floor(-3.3) = -4
529 * ceil(3.3) = 4, ceil(-3.3) = -3
530 */
531U_CAPI double U_EXPORT2
532uprv_trunc(double d)
533{
534#if IEEE_754
535    /* handle error cases*/
536    if(uprv_isNaN(d))
537        return uprv_getNaN();
538    if(uprv_isInfinite(d))
539        return uprv_getInfinity();
540
541    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
542        return ceil(d);
543    else
544        return floor(d);
545
546#else
547    return d >= 0 ? floor(d) : ceil(d);
548
549#endif
550}
551
552/**
553 * Return the largest positive number that can be represented by an integer
554 * type of arbitrary bit length.
555 */
556U_CAPI double U_EXPORT2
557uprv_maxMantissa(void)
558{
559    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
560}
561
562U_CAPI double U_EXPORT2
563uprv_log(double d)
564{
565    return log(d);
566}
567
568U_CAPI void * U_EXPORT2
569uprv_maximumPtr(void * base)
570{
571#if defined(OS400)
572    /*
573     * With the provided function we should never be out of range of a given segment
574     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
575     * id and 3 bytes for the offset.  The key is that the casting takes care of
576     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
577     * seen in a program is x001000 and when casted to an int would be 0.
578     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
579     *
580     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
581     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
582     * This function determines the activation based on the pointer that is passed in and
583     * calculates the appropriate maximum available size for
584     * each pointer type (TERASPACE and non-TERASPACE)
585     *
586     * Unlike other operating systems, the pointer model isn't determined at
587     * compile time on i5/OS.
588     */
589    if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
590        /* if it is a TERASPACE pointer the max is 2GB - 4k */
591        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
592    }
593    /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
594    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
595
596#else
597    return U_MAX_PTR(base);
598#endif
599}
600
601/*---------------------------------------------------------------------------
602  Platform-specific Implementations
603  Try these, and if they don't work on your platform, then special case your
604  platform with new implementations.
605  ---------------------------------------------------------------------------*/
606
607/* Generic time zone layer -------------------------------------------------- */
608
609/* Time zone utilities */
610U_CAPI void U_EXPORT2
611uprv_tzset()
612{
613#ifdef U_TZSET
614    U_TZSET();
615#else
616    /* no initialization*/
617#endif
618}
619
620U_CAPI int32_t U_EXPORT2
621uprv_timezone()
622{
623#ifdef U_TIMEZONE
624    return U_TIMEZONE;
625#else
626    time_t t, t1, t2;
627    struct tm tmrec;
628#ifndef U_IOS
629    UBool dst_checked;
630#endif
631    int32_t tdiff = 0;
632
633    time(&t);
634    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
635#ifndef U_IOS
636    dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
637#endif
638    t1 = mktime(&tmrec);                 /* local time in seconds*/
639    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
640    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
641    tdiff = t2 - t1;
642#ifndef U_IOS
643    /* On iOS the calculated tdiff is correct so and doesn't need this dst
644       shift applied. */
645    /* imitate NT behaviour, which returns same timezone offset to GMT for
646       winter and summer*/
647    if (dst_checked)
648        tdiff += 3600;
649#endif
650    return tdiff;
651#endif
652}
653
654/* Note that U_TZNAME does *not* have to be tzname, but if it is,
655   some platforms need to have it declared here. */
656
657#if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
658/* RS6000 and others reject char **tzname.  */
659extern U_IMPORT char *U_TZNAME[];
660#endif
661
662#if !UCONFIG_NO_FILE_IO && ((defined(U_DARWIN) && !defined(U_IOS)) || defined(U_LINUX) || defined(U_BSD))
663/* These platforms are likely to use Olson timezone IDs. */
664#define CHECK_LOCALTIME_LINK 1
665#if defined(U_DARWIN)
666#include <tzfile.h>
667#define TZZONEINFO      (TZDIR "/")
668#else
669#define TZDEFAULT       "/etc/localtime"
670#define TZZONEINFO      "/usr/share/zoneinfo/"
671#endif
672#if U_HAVE_DIRENT_H
673#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
674/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
675   symlinked to /etc/localtime, which makes searchForTZFile return
676   'localtime' when it's the first match. */
677#define TZFILE_SKIP2    "localtime"
678#define SEARCH_TZFILE
679#include <dirent.h>  /* Needed to search through system timezone files */
680#endif
681static char gTimeZoneBuffer[PATH_MAX];
682static char *gTimeZoneBufferPtr = NULL;
683#endif
684
685#ifndef U_WINDOWS
686#define isNonDigit(ch) (ch < '0' || '9' < ch)
687static UBool isValidOlsonID(const char *id) {
688    int32_t idx = 0;
689
690    /* Determine if this is something like Iceland (Olson ID)
691    or AST4ADT (non-Olson ID) */
692    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
693        idx++;
694    }
695
696    /* If we went through the whole string, then it might be okay.
697    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
698    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
699    The rest of the time it could be an Olson ID. George */
700    return (UBool)(id[idx] == 0
701        || uprv_strcmp(id, "PST8PDT") == 0
702        || uprv_strcmp(id, "MST7MDT") == 0
703        || uprv_strcmp(id, "CST6CDT") == 0
704        || uprv_strcmp(id, "EST5EDT") == 0);
705}
706
707/* On some Unix-like OS, 'posix' subdirectory in
708   /usr/share/zoneinfo replicates the top-level contents. 'right'
709   subdirectory has the same set of files, but individual files
710   are different from those in the top-level directory or 'posix'
711   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
712   has files for UTC.
713   When the first match for /etc/localtime is in either of them
714   (usually in posix because 'right' has different file contents),
715   or TZ environment variable points to one of them, createTimeZone
716   fails because, say, 'posix/America/New_York' is not an Olson
717   timezone id ('America/New_York' is). So, we have to skip
718   'posix/' and 'right/' at the beginning. */
719static void skipZoneIDPrefix(const char** id) {
720    if (uprv_strncmp(*id, "posix/", 6) == 0
721        || uprv_strncmp(*id, "right/", 6) == 0)
722    {
723        *id += 6;
724    }
725}
726#endif
727
728#if defined(U_TZNAME) && !defined(U_WINDOWS)
729
730#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
731typedef struct OffsetZoneMapping {
732    int32_t offsetSeconds;
733    int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
734    const char *stdID;
735    const char *dstID;
736    const char *olsonID;
737} OffsetZoneMapping;
738
739/*
740This list tries to disambiguate a set of abbreviated timezone IDs and offsets
741and maps it to an Olson ID.
742Before adding anything to this list, take a look at
743icu/source/tools/tzcode/tz.alias
744Sometimes no daylight savings (0) is important to define due to aliases.
745This list can be tested with icu/source/test/compat/tzone.pl
746More values could be added to daylightType to increase precision.
747*/
748static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
749    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
750    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
751    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
752    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
753    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
754    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
755    {-36000, 2, "EST", "EST", "Australia/Sydney"},
756    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
757    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
758    {-34200, 2, "CST", "CST", "Australia/South"},
759    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
760    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
761    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
762    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
763    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
764    {-28800, 2, "WST", "WST", "Australia/West"},
765    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
766    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
767    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
768    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
769    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
770    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
771    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
772    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
773    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
774    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
775    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
776    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
777    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
778    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
779    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
780    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
781    {0, 1, "GMT", "IST", "Europe/Dublin"},
782    {0, 1, "GMT", "BST", "Europe/London"},
783    {0, 0, "WET", "WEST", "Africa/Casablanca"},
784    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
785    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
786    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
787    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
788    {10800, 2, "UYT", "UYST", "America/Montevideo"},
789    {10800, 1, "WGT", "WGST", "America/Godthab"},
790    {10800, 2, "BRT", "BRST", "Brazil/East"},
791    {12600, 1, "NST", "NDT", "America/St_Johns"},
792    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
793    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
794    {14400, 2, "CLT", "CLST", "Chile/Continental"},
795    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
796    {14400, 2, "PYT", "PYST", "America/Asuncion"},
797    {18000, 1, "CST", "CDT", "America/Havana"},
798    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
799    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
800    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
801    {21600, 0, "CST", "CDT", "America/Guatemala"},
802    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
803    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
804    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
805    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
806    {32400, 1, "AKST", "AKDT", "US/Alaska"},
807    {36000, 1, "HAST", "HADT", "US/Aleutian"}
808};
809
810/*#define DEBUG_TZNAME*/
811
812static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
813{
814    int32_t idx;
815#ifdef DEBUG_TZNAME
816    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
817#endif
818    for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
819    {
820        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
821            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
822            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
823            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
824        {
825            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
826        }
827    }
828    return NULL;
829}
830#endif
831
832#ifdef SEARCH_TZFILE
833#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
834#define MAX_READ_SIZE 512
835
836typedef struct DefaultTZInfo {
837    char* defaultTZBuffer;
838    int64_t defaultTZFileSize;
839    FILE* defaultTZFilePtr;
840    UBool defaultTZstatus;
841    int32_t defaultTZPosition;
842} DefaultTZInfo;
843
844/*
845 * This method compares the two files given to see if they are a match.
846 * It is currently use to compare two TZ files.
847 */
848static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
849    FILE* file;
850    int64_t sizeFile;
851    int64_t sizeFileLeft;
852    int32_t sizeFileRead;
853    int32_t sizeFileToRead;
854    char bufferFile[MAX_READ_SIZE];
855    UBool result = TRUE;
856
857    if (tzInfo->defaultTZFilePtr == NULL) {
858        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
859    }
860    file = fopen(TZFileName, "r");
861
862    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
863
864    if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
865        /* First check that the file size are equal. */
866        if (tzInfo->defaultTZFileSize == 0) {
867            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
868            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
869        }
870        fseek(file, 0, SEEK_END);
871        sizeFile = ftell(file);
872        sizeFileLeft = sizeFile;
873
874        if (sizeFile != tzInfo->defaultTZFileSize) {
875            result = FALSE;
876        } else {
877            /* Store the data from the files in seperate buffers and
878             * compare each byte to determine equality.
879             */
880            if (tzInfo->defaultTZBuffer == NULL) {
881                rewind(tzInfo->defaultTZFilePtr);
882                tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
883                fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
884            }
885            rewind(file);
886            while(sizeFileLeft > 0) {
887                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
888                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
889
890                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
891                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
892                    result = FALSE;
893                    break;
894                }
895                sizeFileLeft -= sizeFileRead;
896                tzInfo->defaultTZPosition += sizeFileRead;
897            }
898        }
899    } else {
900        result = FALSE;
901    }
902
903    if (file != NULL) {
904        fclose(file);
905    }
906
907    return result;
908}
909/*
910 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
911 */
912/* dirent also lists two entries: "." and ".." that we can safely ignore. */
913#define SKIP1 "."
914#define SKIP2 ".."
915static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
916static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
917    char curpath[MAX_PATH_SIZE];
918    DIR* dirp = opendir(path);
919    DIR* subDirp = NULL;
920    struct dirent* dirEntry = NULL;
921
922    char* result = NULL;
923    if (dirp == NULL) {
924        return result;
925    }
926
927    /* Save the current path */
928    uprv_memset(curpath, 0, MAX_PATH_SIZE);
929    uprv_strcpy(curpath, path);
930
931    /* Check each entry in the directory. */
932    while((dirEntry = readdir(dirp)) != NULL) {
933        const char* dirName = dirEntry->d_name;
934        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
935            /* Create a newpath with the new entry to test each entry in the directory. */
936            char newpath[MAX_PATH_SIZE];
937            uprv_strcpy(newpath, curpath);
938            uprv_strcat(newpath, dirName);
939
940            if ((subDirp = opendir(newpath)) != NULL) {
941                /* If this new path is a directory, make a recursive call with the newpath. */
942                closedir(subDirp);
943                uprv_strcat(newpath, "/");
944                result = searchForTZFile(newpath, tzInfo);
945                /*
946                 Have to get out here. Otherwise, we'd keep looking
947                 and return the first match in the top-level directory
948                 if there's a match in the top-level. If not, this function
949                 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
950                 It worked without this in most cases because we have a fallback of calling
951                 localtime_r to figure out the default timezone.
952                */
953                if (result != NULL)
954                    break;
955            } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
956                if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
957                    const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
958                    skipZoneIDPrefix(&zoneid);
959                    uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
960                    result = SEARCH_TZFILE_RESULT;
961                    /* Get out after the first one found. */
962                    break;
963                }
964            }
965        }
966    }
967    closedir(dirp);
968    return result;
969}
970#endif
971U_CAPI const char* U_EXPORT2
972uprv_tzname(int n)
973{
974    const char *tzid = NULL;
975#ifdef U_WINDOWS
976    tzid = uprv_detectWindowsTimeZone();
977
978    if (tzid != NULL) {
979        return tzid;
980    }
981#else
982
983/*#if defined(U_DARWIN)
984    int ret;
985
986    tzid = getenv("TZFILE");
987    if (tzid != NULL) {
988        return tzid;
989    }
990#endif*/
991
992/* This code can be temporarily disabled to test tzname resolution later on. */
993#ifndef DEBUG_TZNAME
994    tzid = getenv("TZ");
995    if (tzid != NULL && isValidOlsonID(tzid))
996    {
997        /* This might be a good Olson ID. */
998        skipZoneIDPrefix(&tzid);
999        return tzid;
1000    }
1001    /* else U_TZNAME will give a better result. */
1002#endif
1003
1004#if defined(CHECK_LOCALTIME_LINK)
1005    /* Caller must handle threading issues */
1006    if (gTimeZoneBufferPtr == NULL) {
1007        /*
1008        This is a trick to look at the name of the link to get the Olson ID
1009        because the tzfile contents is underspecified.
1010        This isn't guaranteed to work because it may not be a symlink.
1011        */
1012        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1013        if (0 < ret) {
1014            int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1015            gTimeZoneBuffer[ret] = 0;
1016            if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1017                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1018            {
1019                return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1020            }
1021        } else {
1022#if defined(SEARCH_TZFILE)
1023            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1024            if (tzInfo != NULL) {
1025                tzInfo->defaultTZBuffer = NULL;
1026                tzInfo->defaultTZFileSize = 0;
1027                tzInfo->defaultTZFilePtr = NULL;
1028                tzInfo->defaultTZstatus = FALSE;
1029                tzInfo->defaultTZPosition = 0;
1030
1031                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1032
1033                /* Free previously allocated memory */
1034                if (tzInfo->defaultTZBuffer != NULL) {
1035                    uprv_free(tzInfo->defaultTZBuffer);
1036                }
1037                if (tzInfo->defaultTZFilePtr != NULL) {
1038                    fclose(tzInfo->defaultTZFilePtr);
1039                }
1040                uprv_free(tzInfo);
1041            }
1042
1043            if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1044                return gTimeZoneBufferPtr;
1045            }
1046#endif
1047        }
1048    }
1049    else {
1050        return gTimeZoneBufferPtr;
1051    }
1052#endif
1053#endif
1054
1055#ifdef U_TZNAME
1056#ifdef U_WINDOWS
1057    /* The return value is free'd in timezone.cpp on Windows because
1058     * the other code path returns a pointer to a heap location. */
1059    return uprv_strdup(U_TZNAME[n]);
1060#else
1061    /*
1062    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1063    So we remap the abbreviation to an olson ID.
1064
1065    Since Windows exposes a little more timezone information,
1066    we normally don't use this code on Windows because
1067    uprv_detectWindowsTimeZone should have already given the correct answer.
1068    */
1069    {
1070        struct tm juneSol, decemberSol;
1071        int daylightType;
1072        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1073        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1074
1075        /* This probing will tell us when daylight savings occurs.  */
1076        localtime_r(&juneSolstice, &juneSol);
1077        localtime_r(&decemberSolstice, &decemberSol);
1078        daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1079        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1080        if (tzid != NULL) {
1081            return tzid;
1082        }
1083    }
1084    return U_TZNAME[n];
1085#endif
1086#else
1087    return "";
1088#endif
1089}
1090
1091/* Get and set the ICU data directory --------------------------------------- */
1092
1093static char *gDataDirectory = NULL;
1094#if U_POSIX_LOCALE
1095 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1096#endif
1097
1098static UBool U_CALLCONV putil_cleanup(void)
1099{
1100    if (gDataDirectory && *gDataDirectory) {
1101        uprv_free(gDataDirectory);
1102    }
1103    gDataDirectory = NULL;
1104#if U_POSIX_LOCALE
1105    if (gCorrectedPOSIXLocale) {
1106        uprv_free(gCorrectedPOSIXLocale);
1107        gCorrectedPOSIXLocale = NULL;
1108    }
1109#endif
1110    return TRUE;
1111}
1112
1113/*
1114 * Set the data directory.
1115 *    Make a copy of the passed string, and set the global data dir to point to it.
1116 *    TODO:  see bug #2849, regarding thread safety.
1117 */
1118U_CAPI void U_EXPORT2
1119u_setDataDirectory(const char *directory) {
1120    char *newDataDir;
1121    int32_t length;
1122
1123    if(directory==NULL || *directory==0) {
1124        /* A small optimization to prevent the malloc and copy when the
1125        shared library is used, and this is a way to make sure that NULL
1126        is never returned.
1127        */
1128        newDataDir = (char *)"";
1129    }
1130    else {
1131        length=(int32_t)uprv_strlen(directory);
1132        newDataDir = (char *)uprv_malloc(length + 2);
1133        /* Exit out if newDataDir could not be created. */
1134        if (newDataDir == NULL) {
1135            return;
1136        }
1137        uprv_strcpy(newDataDir, directory);
1138
1139#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1140        {
1141            char *p;
1142            while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1143                *p = U_FILE_SEP_CHAR;
1144            }
1145        }
1146#endif
1147    }
1148
1149    umtx_lock(NULL);
1150    if (gDataDirectory && *gDataDirectory) {
1151        uprv_free(gDataDirectory);
1152    }
1153    gDataDirectory = newDataDir;
1154    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1155    umtx_unlock(NULL);
1156}
1157
1158U_CAPI UBool U_EXPORT2
1159uprv_pathIsAbsolute(const char *path)
1160{
1161  if(!path || !*path) {
1162    return FALSE;
1163  }
1164
1165  if(*path == U_FILE_SEP_CHAR) {
1166    return TRUE;
1167  }
1168
1169#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1170  if(*path == U_FILE_ALT_SEP_CHAR) {
1171    return TRUE;
1172  }
1173#endif
1174
1175#if defined(U_WINDOWS)
1176  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1177       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1178      path[1] == ':' ) {
1179    return TRUE;
1180  }
1181#endif
1182
1183  return FALSE;
1184}
1185
1186/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1187   until some client wrapper makefiles are updated */
1188#if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1189# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1190#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1191# endif
1192#endif
1193
1194U_CAPI const char * U_EXPORT2
1195u_getDataDirectory(void) {
1196    const char *path = NULL;
1197#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1198    char datadir_path_buffer[PATH_MAX];
1199#endif
1200
1201    /* if we have the directory, then return it immediately */
1202    UMTX_CHECK(NULL, gDataDirectory, path);
1203
1204    if(path) {
1205        return path;
1206    }
1207
1208    /*
1209    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1210    override ICU's data with the ICU_DATA environment variable. This prevents
1211    problems where multiple custom copies of ICU's specific version of data
1212    are installed on a system. Either the application must define the data
1213    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1214    ICU, set the data with udata_setCommonData or trust that all of the
1215    required data is contained in ICU's data library that contains
1216    the entry point defined by U_ICUDATA_ENTRY_POINT.
1217
1218    There may also be some platforms where environment variables
1219    are not allowed.
1220    */
1221#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1222    /* First try to get the environment variable */
1223    path=getenv("ICU_DATA");
1224#   endif
1225
1226    /* ICU_DATA_DIR may be set as a compile option.
1227     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1228     * and is used only when data is built in archive mode eliminating the need
1229     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1230     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1231     * set their own path.
1232     */
1233#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1234    if(path==NULL || *path==0) {
1235# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1236        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1237# endif
1238# ifdef ICU_DATA_DIR
1239        path=ICU_DATA_DIR;
1240# else
1241        path=U_ICU_DATA_DEFAULT_DIR;
1242# endif
1243# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1244        if (prefix != NULL) {
1245            snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1246            path=datadir_path_buffer;
1247        }
1248# endif
1249    }
1250#endif
1251
1252    if(path==NULL) {
1253        /* It looks really bad, set it to something. */
1254        path = "";
1255    }
1256
1257    u_setDataDirectory(path);
1258    return gDataDirectory;
1259}
1260
1261
1262
1263
1264
1265/* Macintosh-specific locale information ------------------------------------ */
1266#ifdef XP_MAC
1267
1268typedef struct {
1269    int32_t script;
1270    int32_t region;
1271    int32_t lang;
1272    int32_t date_region;
1273    const char* posixID;
1274} mac_lc_rec;
1275
1276/* Todo: This will be updated with a newer version from www.unicode.org web
1277   page when it's available.*/
1278#define MAC_LC_MAGIC_NUMBER -5
1279#define MAC_LC_INIT_NUMBER -9
1280
1281static const mac_lc_rec mac_lc_recs[] = {
1282    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1283    /* United States*/
1284    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1285    /* France*/
1286    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1287    /* Great Britain*/
1288    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1289    /* Germany*/
1290    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1291    /* Italy*/
1292    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1293    /* Metherlands*/
1294    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1295    /* French for Belgium or Lxembourg*/
1296    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1297    /* Sweden*/
1298    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1299    /* Denmark*/
1300    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1301    /* Portugal*/
1302    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1303    /* French Canada*/
1304    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1305    /* Israel*/
1306    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1307    /* Japan*/
1308    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1309    /* Australia*/
1310    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1311    /* the Arabic world (?)*/
1312    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1313    /* Finland*/
1314    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1315    /* French for Switzerland*/
1316    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1317    /* German for Switzerland*/
1318    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1319    /* Greece*/
1320    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1321    /* Iceland ===*/
1322    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1323    /* Malta ===*/
1324    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1325    /* Cyprus ===*/
1326    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1327    /* Turkey ===*/
1328    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1329    /* Croatian system for Yugoslavia*/
1330    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1331    /* Hindi system for India*/
1332    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1333    /* Pakistan*/
1334    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1335    /* Lithuania*/
1336    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1337    /* Poland*/
1338    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1339    /* Hungary*/
1340    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1341    /* Estonia*/
1342    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1343    /* Latvia*/
1344    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1345    /* Lapland  [Ask Rich for the data. HS]*/
1346    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1347    /* Faeroe Islands*/
1348    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1349    /* Iran*/
1350    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1351    /* Russia*/
1352    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1353    /* Ireland*/
1354    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1355    /* Korea*/
1356    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1357    /* People's Republic of China*/
1358    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1359    /* Taiwan*/
1360    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1361    /* Thailand*/
1362
1363    /* fallback is en_US*/
1364    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1365    MAC_LC_MAGIC_NUMBER, "en_US"
1366};
1367
1368#endif
1369
1370#if U_POSIX_LOCALE
1371/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1372 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1373 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1374 */
1375static const char *uprv_getPOSIXIDForCategory(int category)
1376{
1377    const char* posixID = NULL;
1378    if (category == LC_MESSAGES || category == LC_CTYPE) {
1379        /*
1380        * On Solaris two different calls to setlocale can result in
1381        * different values. Only get this value once.
1382        *
1383        * We must check this first because an application can set this.
1384        *
1385        * LC_ALL can't be used because it's platform dependent. The LANG
1386        * environment variable seems to affect LC_CTYPE variable by default.
1387        * Here is what setlocale(LC_ALL, NULL) can return.
1388        * HPUX can return 'C C C C C C C'
1389        * Solaris can return /en_US/C/C/C/C/C on the second try.
1390        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1391        *
1392        * The default codepage detection also needs to use LC_CTYPE.
1393        *
1394        * Do not call setlocale(LC_*, "")! Using an empty string instead
1395        * of NULL, will modify the libc behavior.
1396        */
1397        posixID = setlocale(category, NULL);
1398        if ((posixID == 0)
1399            || (uprv_strcmp("C", posixID) == 0)
1400            || (uprv_strcmp("POSIX", posixID) == 0))
1401        {
1402            /* Maybe we got some garbage.  Try something more reasonable */
1403            posixID = getenv("LC_ALL");
1404            if (posixID == 0) {
1405                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1406                if (posixID == 0) {
1407                    posixID = getenv("LANG");
1408                }
1409            }
1410        }
1411    }
1412    if ((posixID==0)
1413        || (uprv_strcmp("C", posixID) == 0)
1414        || (uprv_strcmp("POSIX", posixID) == 0))
1415    {
1416        /* Nothing worked.  Give it a nice POSIX default value. */
1417        posixID = "en_US_POSIX";
1418    }
1419    return posixID;
1420}
1421
1422/* Return just the POSIX id for the default locale, whatever happens to be in
1423 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1424 */
1425static const char *uprv_getPOSIXIDForDefaultLocale(void)
1426{
1427    static const char* posixID = NULL;
1428    if (posixID == 0) {
1429        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1430    }
1431    return posixID;
1432}
1433
1434/* Return just the POSIX id for the default codepage, whatever happens to be in
1435 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1436 */
1437static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1438{
1439    static const char* posixID = NULL;
1440    if (posixID == 0) {
1441        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1442    }
1443    return posixID;
1444}
1445#endif
1446
1447/* NOTE: The caller should handle thread safety */
1448U_CAPI const char* U_EXPORT2
1449uprv_getDefaultLocaleID()
1450{
1451#if U_POSIX_LOCALE
1452/*
1453  Note that:  (a '!' means the ID is improper somehow)
1454     LC_ALL  ---->     default_loc          codepage
1455--------------------------------------------------------
1456     ab.CD             ab                   CD
1457     ab@CD             ab__CD               -
1458     ab@CD.EF          ab__CD               EF
1459
1460     ab_CD.EF@GH       ab_CD_GH             EF
1461
1462Some 'improper' ways to do the same as above:
1463  !  ab_CD@GH.EF       ab_CD_GH             EF
1464  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1465  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1466
1467     _CD@GH            _CD_GH               -
1468     _CD.EF@GH         _CD_GH               EF
1469
1470The variant cannot have dots in it.
1471The 'rightmost' variant (@xxx) wins.
1472The leftmost codepage (.xxx) wins.
1473*/
1474    char *correctedPOSIXLocale = 0;
1475    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1476    const char *p;
1477    const char *q;
1478    int32_t len;
1479
1480    /* Format: (no spaces)
1481    ll [ _CC ] [ . MM ] [ @ VV]
1482
1483      l = lang, C = ctry, M = charmap, V = variant
1484    */
1485
1486    if (gCorrectedPOSIXLocale != NULL) {
1487        return gCorrectedPOSIXLocale;
1488    }
1489
1490    if ((p = uprv_strchr(posixID, '.')) != NULL) {
1491        /* assume new locale can't be larger than old one? */
1492        correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1493        /* Exit on memory allocation error. */
1494        if (correctedPOSIXLocale == NULL) {
1495            return NULL;
1496        }
1497        uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1498        correctedPOSIXLocale[p-posixID] = 0;
1499
1500        /* do not copy after the @ */
1501        if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1502            correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1503        }
1504    }
1505
1506    /* Note that we scan the *uncorrected* ID. */
1507    if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1508        if (correctedPOSIXLocale == NULL) {
1509            correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1510            /* Exit on memory allocation error. */
1511            if (correctedPOSIXLocale == NULL) {
1512                return NULL;
1513            }
1514            uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1515            correctedPOSIXLocale[p-posixID] = 0;
1516        }
1517        p++;
1518
1519        /* Take care of any special cases here.. */
1520        if (!uprv_strcmp(p, "nynorsk")) {
1521            p = "NY";
1522            /* Don't worry about no__NY. In practice, it won't appear. */
1523        }
1524
1525        if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1526            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1527        }
1528        else {
1529            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1530        }
1531
1532        if ((q = uprv_strchr(p, '.')) != NULL) {
1533            /* How big will the resulting string be? */
1534            len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1535            uprv_strncat(correctedPOSIXLocale, p, q-p);
1536            correctedPOSIXLocale[len] = 0;
1537        }
1538        else {
1539            /* Anything following the @ sign */
1540            uprv_strcat(correctedPOSIXLocale, p);
1541        }
1542
1543        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1544         * How about 'russian' -> 'ru'?
1545         * Many of the other locales using ISO codes will be handled by the
1546         * canonicalization functions in uloc_getDefault.
1547         */
1548    }
1549
1550    /* Was a correction made? */
1551    if (correctedPOSIXLocale != NULL) {
1552        posixID = correctedPOSIXLocale;
1553    }
1554    else {
1555        /* copy it, just in case the original pointer goes away.  See j2395 */
1556        correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1557        /* Exit on memory allocation error. */
1558        if (correctedPOSIXLocale == NULL) {
1559            return NULL;
1560        }
1561        posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1562    }
1563
1564    if (gCorrectedPOSIXLocale == NULL) {
1565        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1566        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1567        correctedPOSIXLocale = NULL;
1568    }
1569
1570    if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1571        uprv_free(correctedPOSIXLocale);
1572    }
1573
1574    return posixID;
1575
1576#elif defined(U_WINDOWS)
1577    UErrorCode status = U_ZERO_ERROR;
1578    LCID id = GetThreadLocale();
1579    const char* locID = uprv_convertToPosix(id, &status);
1580
1581    if (U_FAILURE(status)) {
1582        locID = "en_US";
1583    }
1584    return locID;
1585
1586#elif defined(XP_MAC)
1587    int32_t script = MAC_LC_INIT_NUMBER;
1588    /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1589    int32_t region = MAC_LC_INIT_NUMBER;
1590    /* = GetScriptManagerVariable(smRegionCode);*/
1591    int32_t lang = MAC_LC_INIT_NUMBER;
1592    /* = GetScriptManagerVariable(smScriptLang);*/
1593    int32_t date_region = MAC_LC_INIT_NUMBER;
1594    const char* posixID = 0;
1595    int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1596    int32_t i;
1597    Intl1Hndl ih;
1598
1599    ih = (Intl1Hndl) GetIntlResource(1);
1600    if (ih)
1601        date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1602
1603    for (i = 0; i < count; i++) {
1604        if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1605             || (mac_lc_recs[i].script == script))
1606            && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1607             || (mac_lc_recs[i].region == region))
1608            && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1609             || (mac_lc_recs[i].lang == lang))
1610            && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1611             || (mac_lc_recs[i].date_region == date_region))
1612            )
1613        {
1614            posixID = mac_lc_recs[i].posixID;
1615            break;
1616        }
1617    }
1618
1619    return posixID;
1620
1621#elif defined(OS400)
1622    /* locales are process scoped and are by definition thread safe */
1623    static char correctedLocale[64];
1624    const  char *localeID = getenv("LC_ALL");
1625           char *p;
1626
1627    if (localeID == NULL)
1628        localeID = getenv("LANG");
1629    if (localeID == NULL)
1630        localeID = setlocale(LC_ALL, NULL);
1631    /* Make sure we have something... */
1632    if (localeID == NULL)
1633        return "en_US_POSIX";
1634
1635    /* Extract the locale name from the path. */
1636    if((p = uprv_strrchr(localeID, '/')) != NULL)
1637    {
1638        /* Increment p to start of locale name. */
1639        p++;
1640        localeID = p;
1641    }
1642
1643    /* Copy to work location. */
1644    uprv_strcpy(correctedLocale, localeID);
1645
1646    /* Strip off the '.locale' extension. */
1647    if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1648        *p = 0;
1649    }
1650
1651    /* Upper case the locale name. */
1652    T_CString_toUpperCase(correctedLocale);
1653
1654    /* See if we are using the POSIX locale.  Any of the
1655    * following are equivalent and use the same QLGPGCMA
1656    * (POSIX) locale.
1657    * QLGPGCMA2 means UCS2
1658    * QLGPGCMA_4 means UTF-32
1659    * QLGPGCMA_8 means UTF-8
1660    */
1661    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1662        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1663        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1664    {
1665        uprv_strcpy(correctedLocale, "en_US_POSIX");
1666    }
1667    else
1668    {
1669        int16_t LocaleLen;
1670
1671        /* Lower case the lang portion. */
1672        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1673        {
1674            *p = uprv_tolower(*p);
1675        }
1676
1677        /* Adjust for Euro.  After '_E' add 'URO'. */
1678        LocaleLen = uprv_strlen(correctedLocale);
1679        if (correctedLocale[LocaleLen - 2] == '_' &&
1680            correctedLocale[LocaleLen - 1] == 'E')
1681        {
1682            uprv_strcat(correctedLocale, "URO");
1683        }
1684
1685        /* If using Lotus-based locale then convert to
1686         * equivalent non Lotus.
1687         */
1688        else if (correctedLocale[LocaleLen - 2] == '_' &&
1689            correctedLocale[LocaleLen - 1] == 'L')
1690        {
1691            correctedLocale[LocaleLen - 2] = 0;
1692        }
1693
1694        /* There are separate simplified and traditional
1695         * locales called zh_HK_S and zh_HK_T.
1696         */
1697        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1698        {
1699            uprv_strcpy(correctedLocale, "zh_HK");
1700        }
1701
1702        /* A special zh_CN_GBK locale...
1703        */
1704        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1705        {
1706            uprv_strcpy(correctedLocale, "zh_CN");
1707        }
1708
1709    }
1710
1711    return correctedLocale;
1712#endif
1713
1714}
1715
1716#if !U_CHARSET_IS_UTF8
1717#if U_POSIX_LOCALE
1718/*
1719Due to various platform differences, one platform may specify a charset,
1720when they really mean a different charset. Remap the names so that they are
1721compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1722here. Before adding anything to this function, please consider adding unique
1723names to the ICU alias table in the data directory.
1724*/
1725static const char*
1726remapPlatformDependentCodepage(const char *locale, const char *name) {
1727    if (locale != NULL && *locale == 0) {
1728        /* Make sure that an empty locale is handled the same way. */
1729        locale = NULL;
1730    }
1731    if (name == NULL) {
1732        return NULL;
1733    }
1734#if defined(U_AIX)
1735    if (uprv_strcmp(name, "IBM-943") == 0) {
1736        /* Use the ASCII compatible ibm-943 */
1737        name = "Shift-JIS";
1738    }
1739    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1740        /* Use the windows-1252 that contains the Euro */
1741        name = "IBM-5348";
1742    }
1743#elif defined(U_SOLARIS)
1744    if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1745        /* Solaris underspecifies the "EUC" name. */
1746        if (uprv_strcmp(locale, "zh_CN") == 0) {
1747            name = "EUC-CN";
1748        }
1749        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1750            name = "EUC-TW";
1751        }
1752        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1753            name = "EUC-KR";
1754        }
1755    }
1756    else if (uprv_strcmp(name, "eucJP") == 0) {
1757        /*
1758        ibm-954 is the best match.
1759        ibm-33722 is the default for eucJP (similar to Windows).
1760        */
1761        name = "eucjis";
1762    }
1763    else if (uprv_strcmp(name, "646") == 0) {
1764        /*
1765         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1766         * ISO-8859-1 instead of US-ASCII(646).
1767         */
1768        name = "ISO-8859-1";
1769    }
1770#elif defined(U_DARWIN)
1771    if (locale == NULL && *name == 0) {
1772        /*
1773        No locale was specified, and an empty name was passed in.
1774        This usually indicates that nl_langinfo didn't return valid information.
1775        Mac OS X uses UTF-8 by default (especially the locale data and console).
1776        */
1777        name = "UTF-8";
1778    }
1779    else if (uprv_strcmp(name, "CP949") == 0) {
1780        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1781        name = "EUC-KR";
1782    }
1783    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1784        /*
1785         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1786         */
1787        name = "UTF-8";
1788    }
1789#elif defined(U_BSD)
1790    if (uprv_strcmp(name, "CP949") == 0) {
1791        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1792        name = "EUC-KR";
1793    }
1794#elif defined(U_HPUX)
1795    if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1796        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1797        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1798        name = "hkbig5";
1799    }
1800    else if (uprv_strcmp(name, "eucJP") == 0) {
1801        /*
1802        ibm-1350 is the best match, but unavailable.
1803        ibm-954 is mostly a superset of ibm-1350.
1804        ibm-33722 is the default for eucJP (similar to Windows).
1805        */
1806        name = "eucjis";
1807    }
1808#elif defined(U_LINUX)
1809    if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1810        /* Linux underspecifies the "EUC" name. */
1811        if (uprv_strcmp(locale, "korean") == 0) {
1812            name = "EUC-KR";
1813        }
1814        else if (uprv_strcmp(locale, "japanese") == 0) {
1815            /* See comment below about eucJP */
1816            name = "eucjis";
1817        }
1818    }
1819    else if (uprv_strcmp(name, "eucjp") == 0) {
1820        /*
1821        ibm-1350 is the best match, but unavailable.
1822        ibm-954 is mostly a superset of ibm-1350.
1823        ibm-33722 is the default for eucJP (similar to Windows).
1824        */
1825        name = "eucjis";
1826    }
1827    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1828            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1829        /*
1830         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1831         */
1832        name = "UTF-8";
1833    }
1834    /*
1835     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1836     * it by falling back to 'US-ASCII' when NULL is returned from this
1837     * function. So, we don't have to worry about it here.
1838     */
1839#endif
1840    /* return NULL when "" is passed in */
1841    if (*name == 0) {
1842        name = NULL;
1843    }
1844    return name;
1845}
1846
1847static const char*
1848getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1849{
1850    char localeBuf[100];
1851    const char *name = NULL;
1852    char *variant = NULL;
1853
1854    if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1855        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1856        uprv_strncpy(localeBuf, localeName, localeCapacity);
1857        localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1858        name = uprv_strncpy(buffer, name+1, buffCapacity);
1859        buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1860        if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1861            *variant = 0;
1862        }
1863        name = remapPlatformDependentCodepage(localeBuf, name);
1864    }
1865    return name;
1866}
1867#endif
1868
1869static const char*
1870int_getDefaultCodepage()
1871{
1872#if defined(OS400)
1873    uint32_t ccsid = 37; /* Default to ibm-37 */
1874    static char codepage[64];
1875    Qwc_JOBI0400_t jobinfo;
1876    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1877
1878    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1879        "*                         ", "                ", &error);
1880
1881    if (error.Bytes_Available == 0) {
1882        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1883            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1884        }
1885        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1886            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1887        }
1888        /* else use the default */
1889    }
1890    sprintf(codepage,"ibm-%d", ccsid);
1891    return codepage;
1892
1893#elif defined(OS390)
1894    static char codepage[64];
1895
1896    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1897    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1898    codepage[63] = 0; /* NULL terminate */
1899
1900    return codepage;
1901
1902#elif defined(XP_MAC)
1903    return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1904
1905#elif defined(U_WINDOWS)
1906    static char codepage[64];
1907    sprintf(codepage, "windows-%d", GetACP());
1908    return codepage;
1909
1910#elif U_POSIX_LOCALE
1911    static char codesetName[100];
1912    const char *localeName = NULL;
1913    const char *name = NULL;
1914
1915    localeName = uprv_getPOSIXIDForDefaultCodepage();
1916    uprv_memset(codesetName, 0, sizeof(codesetName));
1917#if U_HAVE_NL_LANGINFO_CODESET
1918    /* When available, check nl_langinfo first because it usually gives more
1919       useful names. It depends on LC_CTYPE.
1920       nl_langinfo may use the same buffer as setlocale. */
1921    {
1922        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1923#if defined(U_DARWIN) || defined(U_LINUX)
1924        /*
1925         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1926         * instead of ASCII.
1927         */
1928        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1929            codeset = remapPlatformDependentCodepage(localeName, codeset);
1930        } else
1931#endif
1932        {
1933            codeset = remapPlatformDependentCodepage(NULL, codeset);
1934        }
1935
1936        if (codeset != NULL) {
1937            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1938            codesetName[sizeof(codesetName)-1] = 0;
1939            return codesetName;
1940        }
1941    }
1942#endif
1943
1944    /* Use setlocale in a nice way, and then check some environment variables.
1945       Maybe the application used setlocale already.
1946    */
1947    uprv_memset(codesetName, 0, sizeof(codesetName));
1948    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1949    if (name) {
1950        /* if we can find the codeset name from setlocale, return that. */
1951        return name;
1952    }
1953
1954    if (*codesetName == 0)
1955    {
1956        /* Everything failed. Return US ASCII (ISO 646). */
1957        (void)uprv_strcpy(codesetName, "US-ASCII");
1958    }
1959    return codesetName;
1960#else
1961    return "US-ASCII";
1962#endif
1963}
1964
1965
1966U_CAPI const char*  U_EXPORT2
1967uprv_getDefaultCodepage()
1968{
1969    static char const  *name = NULL;
1970    umtx_lock(NULL);
1971    if (name == NULL) {
1972        name = int_getDefaultCodepage();
1973    }
1974    umtx_unlock(NULL);
1975    return name;
1976}
1977#endif  /* !U_CHARSET_IS_UTF8 */
1978
1979
1980/* end of platform-specific implementation -------------- */
1981
1982/* version handling --------------------------------------------------------- */
1983
1984U_CAPI void U_EXPORT2
1985u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1986    char *end;
1987    uint16_t part=0;
1988
1989    if(versionArray==NULL) {
1990        return;
1991    }
1992
1993    if(versionString!=NULL) {
1994        for(;;) {
1995            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1996            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1997                break;
1998            }
1999            versionString=end+1;
2000        }
2001    }
2002
2003    while(part<U_MAX_VERSION_LENGTH) {
2004        versionArray[part++]=0;
2005    }
2006}
2007
2008U_CAPI void U_EXPORT2
2009u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2010    if(versionArray!=NULL && versionString!=NULL) {
2011        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2012        int32_t len = u_strlen(versionString);
2013        if(len>U_MAX_VERSION_STRING_LENGTH) {
2014            len = U_MAX_VERSION_STRING_LENGTH;
2015        }
2016        u_UCharsToChars(versionString, versionChars, len);
2017        versionChars[len]=0;
2018        u_versionFromString(versionArray, versionChars);
2019    }
2020}
2021
2022U_CAPI void U_EXPORT2
2023u_versionToString(UVersionInfo versionArray, char *versionString) {
2024    uint16_t count, part;
2025    uint8_t field;
2026
2027    if(versionString==NULL) {
2028        return;
2029    }
2030
2031    if(versionArray==NULL) {
2032        versionString[0]=0;
2033        return;
2034    }
2035
2036    /* count how many fields need to be written */
2037    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2038    }
2039
2040    if(count <= 1) {
2041        count = 2;
2042    }
2043
2044    /* write the first part */
2045    /* write the decimal field value */
2046    field=versionArray[0];
2047    if(field>=100) {
2048        *versionString++=(char)('0'+field/100);
2049        field%=100;
2050    }
2051    if(field>=10) {
2052        *versionString++=(char)('0'+field/10);
2053        field%=10;
2054    }
2055    *versionString++=(char)('0'+field);
2056
2057    /* write the following parts */
2058    for(part=1; part<count; ++part) {
2059        /* write a dot first */
2060        *versionString++=U_VERSION_DELIMITER;
2061
2062        /* write the decimal field value */
2063        field=versionArray[part];
2064        if(field>=100) {
2065            *versionString++=(char)('0'+field/100);
2066            field%=100;
2067        }
2068        if(field>=10) {
2069            *versionString++=(char)('0'+field/10);
2070            field%=10;
2071        }
2072        *versionString++=(char)('0'+field);
2073    }
2074
2075    /* NUL-terminate */
2076    *versionString=0;
2077}
2078
2079U_CAPI void U_EXPORT2
2080u_getVersion(UVersionInfo versionArray) {
2081    u_versionFromString(versionArray, U_ICU_VERSION);
2082}
2083
2084/**
2085 * icucfg.h dependent code
2086 */
2087
2088#if U_ENABLE_DYLOAD
2089
2090#if defined(U_CHECK_DYLOAD)
2091
2092#if defined(HAVE_DLOPEN)
2093
2094#ifdef HAVE_DLFCN_H
2095#ifdef __MVS__
2096#ifndef __SUSV3
2097#define __SUSV3 1
2098#endif
2099#endif
2100#include <dlfcn.h>
2101#endif
2102
2103U_INTERNAL void * U_EXPORT2
2104uprv_dl_open(const char *libName, UErrorCode *status) {
2105  void *ret = NULL;
2106  if(U_FAILURE(*status)) return ret;
2107  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2108  if(ret==NULL) {
2109#ifndef U_TRACE_DYLOAD
2110    perror("dlopen");
2111#endif
2112    *status = U_MISSING_RESOURCE_ERROR;
2113  }
2114  return ret;
2115}
2116
2117U_INTERNAL void U_EXPORT2
2118uprv_dl_close(void *lib, UErrorCode *status) {
2119  if(U_FAILURE(*status)) return;
2120  dlclose(lib);
2121}
2122
2123U_INTERNAL void* U_EXPORT2
2124uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2125  void *ret = NULL;
2126  if(U_FAILURE(*status)) return ret;
2127  ret = dlsym(lib, sym);
2128  if(ret == NULL) {
2129    *status = U_MISSING_RESOURCE_ERROR;
2130  }
2131  return ret;
2132}
2133
2134#else
2135
2136/* null (nonexistent) implementation. */
2137
2138U_INTERNAL void * U_EXPORT2
2139uprv_dl_open(const char *libName, UErrorCode *status) {
2140  if(U_FAILURE(*status)) return NULL;
2141  *status = U_UNSUPPORTED_ERROR;
2142  return NULL;
2143}
2144
2145U_INTERNAL void U_EXPORT2
2146uprv_dl_close(void *lib, UErrorCode *status) {
2147  if(U_FAILURE(*status)) return;
2148  *status = U_UNSUPPORTED_ERROR;
2149  return;
2150}
2151
2152
2153U_INTERNAL void* U_EXPORT2
2154uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2155  if(U_FAILURE(*status)) return NULL;
2156  *status = U_UNSUPPORTED_ERROR;
2157  return NULL;
2158}
2159
2160
2161
2162#endif
2163
2164#elif defined U_WINDOWS
2165
2166U_INTERNAL void * U_EXPORT2
2167uprv_dl_open(const char *libName, UErrorCode *status) {
2168  HMODULE lib = NULL;
2169
2170  if(U_FAILURE(*status)) return NULL;
2171
2172  lib = LoadLibraryA(libName);
2173
2174  if(lib==NULL) {
2175    *status = U_MISSING_RESOURCE_ERROR;
2176  }
2177
2178  return (void*)lib;
2179}
2180
2181U_INTERNAL void U_EXPORT2
2182uprv_dl_close(void *lib, UErrorCode *status) {
2183  HMODULE handle = (HMODULE)lib;
2184  if(U_FAILURE(*status)) return;
2185
2186  FreeLibrary(handle);
2187
2188  return;
2189}
2190
2191
2192U_INTERNAL void* U_EXPORT2
2193uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2194  HMODULE handle = (HMODULE)lib;
2195  void * addr = NULL;
2196
2197  if(U_FAILURE(*status) || lib==NULL) return NULL;
2198
2199  addr = GetProcAddress(handle, sym);
2200
2201  if(addr==NULL) {
2202    DWORD lastError = GetLastError();
2203    if(lastError == ERROR_PROC_NOT_FOUND) {
2204      *status = U_MISSING_RESOURCE_ERROR;
2205    } else {
2206      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2207    }
2208  }
2209
2210  return addr;
2211}
2212
2213
2214#else
2215
2216/* No dynamic loading set. */
2217
2218U_INTERNAL void * U_EXPORT2
2219uprv_dl_open(const char *libName, UErrorCode *status) {
2220    if(U_FAILURE(*status)) return NULL;
2221    *status = U_UNSUPPORTED_ERROR;
2222    return NULL;
2223}
2224
2225U_INTERNAL void U_EXPORT2
2226uprv_dl_close(void *lib, UErrorCode *status) {
2227    if(U_FAILURE(*status)) return;
2228    *status = U_UNSUPPORTED_ERROR;
2229    return;
2230}
2231
2232
2233U_INTERNAL void* U_EXPORT2
2234uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2235    if(U_FAILURE(*status)) return NULL;
2236    *status = U_UNSUPPORTED_ERROR;
2237    return NULL;
2238}
2239
2240
2241#endif
2242
2243#endif /* U_ENABLE_DYLOAD */
2244
2245/*
2246 * Hey, Emacs, please set the following:
2247 *
2248 * Local Variables:
2249 * indent-tabs-mode: nil
2250 * End:
2251 *
2252 */
2253