1/*
2*******************************************************************************
3* Copyright (C) 1997-2013, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*
7* File SMPDTFMT.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   02/19/97    aliu        Converted from java.
13*   03/31/97    aliu        Modified extensively to work with 50 locales.
14*   04/01/97    aliu        Added support for centuries.
15*   07/09/97    helena      Made ParsePosition into a class.
16*   07/21/98    stephen     Added initializeDefaultCentury.
17*                             Removed getZoneIndex (added in DateFormatSymbols)
18*                             Removed subParseLong
19*                             Removed chk
20*  02/22/99     stephen     Removed character literals for EBCDIC safety
21*   10/14/99    aliu        Updated 2-digit year parsing so that only "00" thru
22*                           "99" are recognized. {j28 4182066}
23*   11/15/99    weiv        Added support for week of year/day of week format
24********************************************************************************
25*/
26
27#define ZID_KEY_MAX 128
28
29#include "unicode/utypes.h"
30
31#if !UCONFIG_NO_FORMATTING
32
33#include "unicode/smpdtfmt.h"
34#include "unicode/dtfmtsym.h"
35#include "unicode/ures.h"
36#include "unicode/msgfmt.h"
37#include "unicode/calendar.h"
38#include "unicode/gregocal.h"
39#include "unicode/timezone.h"
40#include "unicode/decimfmt.h"
41#include "unicode/dcfmtsym.h"
42#include "unicode/uchar.h"
43#include "unicode/uniset.h"
44#include "unicode/ustring.h"
45#include "unicode/basictz.h"
46#include "unicode/simpletz.h"
47#include "unicode/rbtz.h"
48#include "unicode/tzfmt.h"
49#include "unicode/utf16.h"
50#include "unicode/vtzone.h"
51#include "unicode/udisplaycontext.h"
52#include "olsontz.h"
53#include "patternprops.h"
54#include "fphdlimp.h"
55#include "gregoimp.h"
56#include "hebrwcal.h"
57#include "cstring.h"
58#include "uassert.h"
59#include "cmemory.h"
60#include "umutex.h"
61#include <float.h>
62#include "smpdtfst.h"
63
64#if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
65#include <stdio.h>
66#endif
67
68// *****************************************************************************
69// class SimpleDateFormat
70// *****************************************************************************
71
72U_NAMESPACE_BEGIN
73
74static const UChar PATTERN_CHAR_BASE = 0x40;
75
76/**
77 * Last-resort string to use for "GMT" when constructing time zone strings.
78 */
79// For time zones that have no names, use strings GMT+minutes and
80// GMT-minutes. For instance, in France the time zone is GMT+60.
81// Also accepted are GMT+H:MM or GMT-H:MM.
82// Currently not being used
83//static const UChar gGmt[]      = {0x0047, 0x004D, 0x0054, 0x0000};         // "GMT"
84//static const UChar gGmtPlus[]  = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
85//static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
86//static const UChar gDefGmtPat[]       = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
87//static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
88//static const UChar gDefGmtNegHmPat[]  = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
89//static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
90//static const UChar gDefGmtPosHmPat[]  = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
91//static const UChar gUt[]       = {0x0055, 0x0054, 0x0000};  // "UT"
92//static const UChar gUtc[]      = {0x0055, 0x0054, 0x0043, 0x0000};  // "UT"
93
94typedef enum GmtPatSize {
95    kGmtLen = 3,
96    kGmtPatLen = 6,
97    kNegHmsLen = 9,
98    kNegHmLen = 6,
99    kPosHmsLen = 9,
100    kPosHmLen = 6,
101    kUtLen = 2,
102    kUtcLen = 3
103} GmtPatSize;
104
105// Stuff needed for numbering system overrides
106
107typedef enum OvrStrType {
108    kOvrStrDate = 0,
109    kOvrStrTime = 1,
110    kOvrStrBoth = 2
111} OvrStrType;
112
113static const UDateFormatField kDateFields[] = {
114    UDAT_YEAR_FIELD,
115    UDAT_MONTH_FIELD,
116    UDAT_DATE_FIELD,
117    UDAT_DAY_OF_YEAR_FIELD,
118    UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
119    UDAT_WEEK_OF_YEAR_FIELD,
120    UDAT_WEEK_OF_MONTH_FIELD,
121    UDAT_YEAR_WOY_FIELD,
122    UDAT_EXTENDED_YEAR_FIELD,
123    UDAT_JULIAN_DAY_FIELD,
124    UDAT_STANDALONE_DAY_FIELD,
125    UDAT_STANDALONE_MONTH_FIELD,
126    UDAT_QUARTER_FIELD,
127    UDAT_STANDALONE_QUARTER_FIELD,
128    UDAT_YEAR_NAME_FIELD };
129static const int8_t kDateFieldsCount = 15;
130
131static const UDateFormatField kTimeFields[] = {
132    UDAT_HOUR_OF_DAY1_FIELD,
133    UDAT_HOUR_OF_DAY0_FIELD,
134    UDAT_MINUTE_FIELD,
135    UDAT_SECOND_FIELD,
136    UDAT_FRACTIONAL_SECOND_FIELD,
137    UDAT_HOUR1_FIELD,
138    UDAT_HOUR0_FIELD,
139    UDAT_MILLISECONDS_IN_DAY_FIELD,
140    UDAT_TIMEZONE_RFC_FIELD,
141    UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD };
142static const int8_t kTimeFieldsCount = 10;
143
144
145// This is a pattern-of-last-resort used when we can't load a usable pattern out
146// of a resource.
147static const UChar gDefaultPattern[] =
148{
149    0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
150};  /* "yyyyMMdd hh:mm a" */
151
152// This prefix is designed to NEVER MATCH real text, in order to
153// suppress the parsing of negative numbers.  Adjust as needed (if
154// this becomes valid Unicode).
155static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
156
157/**
158 * These are the tags we expect to see in normal resource bundle files associated
159 * with a locale.
160 */
161static const char gDateTimePatternsTag[]="DateTimePatterns";
162
163//static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC"
164static const UChar QUOTE = 0x27; // Single quote
165
166/*
167 * The field range check bias for each UDateFormatField.
168 * The bias is added to the minimum and maximum values
169 * before they are compared to the parsed number.
170 * For example, the calendar stores zero-based month numbers
171 * but the parsed month numbers start at 1, so the bias is 1.
172 *
173 * A value of -1 means that the value is not checked.
174 */
175static const int32_t gFieldRangeBias[] = {
176    -1,  // 'G' - UDAT_ERA_FIELD
177    -1,  // 'y' - UDAT_YEAR_FIELD
178     1,  // 'M' - UDAT_MONTH_FIELD
179     0,  // 'd' - UDAT_DATE_FIELD
180    -1,  // 'k' - UDAT_HOUR_OF_DAY1_FIELD
181    -1,  // 'H' - UDAT_HOUR_OF_DAY0_FIELD
182     0,  // 'm' - UDAT_MINUTE_FIELD
183     0,  // 's' - UDAT_SEOND_FIELD
184    -1,  // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
185    -1,  // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
186    -1,  // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
187    -1,  // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
188    -1,  // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
189    -1,  // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
190    -1,  // 'a' - UDAT_AM_PM_FIELD
191    -1,  // 'h' - UDAT_HOUR1_FIELD
192    -1,  // 'K' - UDAT_HOUR0_FIELD
193    -1,  // 'z' - UDAT_TIMEZONE_FIELD
194    -1,  // 'Y' - UDAT_YEAR_WOY_FIELD
195    -1,  // 'e' - UDAT_DOW_LOCAL_FIELD
196    -1,  // 'u' - UDAT_EXTENDED_YEAR_FIELD
197    -1,  // 'g' - UDAT_JULIAN_DAY_FIELD
198    -1,  // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
199    -1,  // 'Z' - UDAT_TIMEZONE_RFC_FIELD
200    -1,  // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
201     0,  // 'c' - UDAT_STANDALONE_DAY_FIELD
202     1,  // 'L' - UDAT_STANDALONE_MONTH_FIELD
203    -1,  // 'Q' - UDAT_QUARTER_FIELD (1-4?)
204    -1,  // 'q' - UDAT_STANDALONE_QUARTER_FIELD
205    -1   // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
206    -1,  // 'U' - UDAT_YEAR_NAME_FIELD
207    -1,  // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD
208    -1,  // 'X' - UDAT_TIMEZONE_ISO_FIELD
209    -1,  // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD
210};
211
212// When calendar uses hebr numbering (i.e. he@calendar=hebrew),
213// offset the years within the current millenium down to 1-999
214static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000;
215static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000;
216
217static UMutex LOCK = U_MUTEX_INITIALIZER;
218
219UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
220
221//----------------------------------------------------------------------
222
223SimpleDateFormat::~SimpleDateFormat()
224{
225    delete fSymbols;
226    if (fNumberFormatters) {
227        uprv_free(fNumberFormatters);
228    }
229    if (fTimeZoneFormat) {
230        delete fTimeZoneFormat;
231    }
232
233    while (fOverrideList) {
234        NSOverride *cur = fOverrideList;
235        fOverrideList = cur->next;
236        delete cur->nf;
237        uprv_free(cur);
238    }
239}
240
241//----------------------------------------------------------------------
242
243SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
244  :   fLocale(Locale::getDefault()),
245      fSymbols(NULL),
246      fTimeZoneFormat(NULL),
247      fNumberFormatters(NULL),
248      fOverrideList(NULL),
249      fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
250{
251    construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
252    initializeDefaultCentury();
253}
254
255//----------------------------------------------------------------------
256
257SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
258                                   UErrorCode &status)
259:   fPattern(pattern),
260    fLocale(Locale::getDefault()),
261    fSymbols(NULL),
262    fTimeZoneFormat(NULL),
263    fNumberFormatters(NULL),
264    fOverrideList(NULL),
265    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
266{
267    fDateOverride.setToBogus();
268    fTimeOverride.setToBogus();
269    initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
270    initialize(fLocale, status);
271    initializeDefaultCentury();
272
273}
274//----------------------------------------------------------------------
275
276SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
277                                   const UnicodeString& override,
278                                   UErrorCode &status)
279:   fPattern(pattern),
280    fLocale(Locale::getDefault()),
281    fSymbols(NULL),
282    fTimeZoneFormat(NULL),
283    fNumberFormatters(NULL),
284    fOverrideList(NULL),
285    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
286{
287    fDateOverride.setTo(override);
288    fTimeOverride.setToBogus();
289    initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
290    initialize(fLocale, status);
291    initializeDefaultCentury();
292
293    processOverrideString(fLocale,override,kOvrStrBoth,status);
294
295}
296
297//----------------------------------------------------------------------
298
299SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
300                                   const Locale& locale,
301                                   UErrorCode& status)
302:   fPattern(pattern),
303    fLocale(locale),
304    fTimeZoneFormat(NULL),
305    fNumberFormatters(NULL),
306    fOverrideList(NULL),
307    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
308{
309
310    fDateOverride.setToBogus();
311    fTimeOverride.setToBogus();
312
313    initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
314    initialize(fLocale, status);
315    initializeDefaultCentury();
316}
317
318//----------------------------------------------------------------------
319
320SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
321                                   const UnicodeString& override,
322                                   const Locale& locale,
323                                   UErrorCode& status)
324:   fPattern(pattern),
325    fLocale(locale),
326    fTimeZoneFormat(NULL),
327    fNumberFormatters(NULL),
328    fOverrideList(NULL),
329    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
330{
331
332    fDateOverride.setTo(override);
333    fTimeOverride.setToBogus();
334
335    initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
336    initialize(fLocale, status);
337    initializeDefaultCentury();
338
339    processOverrideString(locale,override,kOvrStrBoth,status);
340
341}
342
343//----------------------------------------------------------------------
344
345SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
346                                   DateFormatSymbols* symbolsToAdopt,
347                                   UErrorCode& status)
348:   fPattern(pattern),
349    fLocale(Locale::getDefault()),
350    fSymbols(symbolsToAdopt),
351    fTimeZoneFormat(NULL),
352    fNumberFormatters(NULL),
353    fOverrideList(NULL),
354    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
355{
356
357    fDateOverride.setToBogus();
358    fTimeOverride.setToBogus();
359
360    initializeCalendar(NULL,fLocale,status);
361    initialize(fLocale, status);
362    initializeDefaultCentury();
363}
364
365//----------------------------------------------------------------------
366
367SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
368                                   const DateFormatSymbols& symbols,
369                                   UErrorCode& status)
370:   fPattern(pattern),
371    fLocale(Locale::getDefault()),
372    fSymbols(new DateFormatSymbols(symbols)),
373    fTimeZoneFormat(NULL),
374    fNumberFormatters(NULL),
375    fOverrideList(NULL),
376    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
377{
378
379    fDateOverride.setToBogus();
380    fTimeOverride.setToBogus();
381
382    initializeCalendar(NULL, fLocale, status);
383    initialize(fLocale, status);
384    initializeDefaultCentury();
385}
386
387//----------------------------------------------------------------------
388
389// Not for public consumption; used by DateFormat
390SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
391                                   EStyle dateStyle,
392                                   const Locale& locale,
393                                   UErrorCode& status)
394:   fLocale(locale),
395    fSymbols(NULL),
396    fTimeZoneFormat(NULL),
397    fNumberFormatters(NULL),
398    fOverrideList(NULL),
399    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
400{
401    construct(timeStyle, dateStyle, fLocale, status);
402    if(U_SUCCESS(status)) {
403      initializeDefaultCentury();
404    }
405}
406
407//----------------------------------------------------------------------
408
409/**
410 * Not for public consumption; used by DateFormat.  This constructor
411 * never fails.  If the resource data is not available, it uses the
412 * the last resort symbols.
413 */
414SimpleDateFormat::SimpleDateFormat(const Locale& locale,
415                                   UErrorCode& status)
416:   fPattern(gDefaultPattern),
417    fLocale(locale),
418    fSymbols(NULL),
419    fTimeZoneFormat(NULL),
420    fNumberFormatters(NULL),
421    fOverrideList(NULL),
422    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
423{
424    if (U_FAILURE(status)) return;
425    initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status);
426    if (U_FAILURE(status))
427    {
428        status = U_ZERO_ERROR;
429        delete fSymbols;
430        // This constructor doesn't fail; it uses last resort data
431        fSymbols = new DateFormatSymbols(status);
432        /* test for NULL */
433        if (fSymbols == 0) {
434            status = U_MEMORY_ALLOCATION_ERROR;
435            return;
436        }
437    }
438
439    fDateOverride.setToBogus();
440    fTimeOverride.setToBogus();
441
442    initialize(fLocale, status);
443    if(U_SUCCESS(status)) {
444      initializeDefaultCentury();
445    }
446}
447
448//----------------------------------------------------------------------
449
450SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
451:   DateFormat(other),
452    fLocale(other.fLocale),
453    fSymbols(NULL),
454    fTimeZoneFormat(NULL),
455    fNumberFormatters(NULL),
456    fOverrideList(NULL),
457    fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
458{
459    *this = other;
460}
461
462//----------------------------------------------------------------------
463
464SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
465{
466    if (this == &other) {
467        return *this;
468    }
469    DateFormat::operator=(other);
470
471    delete fSymbols;
472    fSymbols = NULL;
473
474    if (other.fSymbols)
475        fSymbols = new DateFormatSymbols(*other.fSymbols);
476
477    fDefaultCenturyStart         = other.fDefaultCenturyStart;
478    fDefaultCenturyStartYear     = other.fDefaultCenturyStartYear;
479    fHaveDefaultCentury          = other.fHaveDefaultCentury;
480
481    fPattern = other.fPattern;
482
483    // TimeZoneFormat in ICU4C only depends on a locale for now
484    if (fLocale != other.fLocale) {
485        delete fTimeZoneFormat;
486        fTimeZoneFormat = NULL; // forces lazy instantiation with the other locale
487        fLocale = other.fLocale;
488    }
489
490    fCapitalizationContext = other.fCapitalizationContext;
491
492    return *this;
493}
494
495//----------------------------------------------------------------------
496
497Format*
498SimpleDateFormat::clone() const
499{
500    return new SimpleDateFormat(*this);
501}
502
503//----------------------------------------------------------------------
504
505UBool
506SimpleDateFormat::operator==(const Format& other) const
507{
508    if (DateFormat::operator==(other)) {
509        // DateFormat::operator== guarantees following cast is safe
510        SimpleDateFormat* that = (SimpleDateFormat*)&other;
511        return (fPattern             == that->fPattern &&
512                fSymbols             != NULL && // Check for pathological object
513                that->fSymbols       != NULL && // Check for pathological object
514                *fSymbols            == *that->fSymbols &&
515                fHaveDefaultCentury  == that->fHaveDefaultCentury &&
516                fDefaultCenturyStart == that->fDefaultCenturyStart &&
517                fCapitalizationContext == that->fCapitalizationContext);
518    }
519    return FALSE;
520}
521
522//----------------------------------------------------------------------
523
524void SimpleDateFormat::construct(EStyle timeStyle,
525                                 EStyle dateStyle,
526                                 const Locale& locale,
527                                 UErrorCode& status)
528{
529    // called by several constructors to load pattern data from the resources
530    if (U_FAILURE(status)) return;
531
532    // We will need the calendar to know what type of symbols to load.
533    initializeCalendar(NULL, locale, status);
534    if (U_FAILURE(status)) return;
535
536    CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status);
537    UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status);
538    UResourceBundle *currentBundle;
539
540    if (U_FAILURE(status)) return;
541
542    if (ures_getSize(dateTimePatterns) <= kDateTime)
543    {
544        status = U_INVALID_FORMAT_ERROR;
545        return;
546    }
547
548    setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status),
549                 ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status));
550
551    // create a symbols object from the locale
552    initializeSymbols(locale,fCalendar, status);
553    if (U_FAILURE(status)) return;
554    /* test for NULL */
555    if (fSymbols == 0) {
556        status = U_MEMORY_ALLOCATION_ERROR;
557        return;
558    }
559
560    const UChar *resStr,*ovrStr;
561    int32_t resStrLen,ovrStrLen = 0;
562    fDateOverride.setToBogus();
563    fTimeOverride.setToBogus();
564
565    // if the pattern should include both date and time information, use the date/time
566    // pattern string as a guide to tell use how to glue together the appropriate date
567    // and time pattern strings.  The actual gluing-together is handled by a convenience
568    // method on MessageFormat.
569    if ((timeStyle != kNone) && (dateStyle != kNone))
570    {
571        Formattable timeDateArray[2];
572
573        // use Formattable::adoptString() so that we can use fastCopyFrom()
574        // instead of Formattable::setString()'s unaware, safe, deep string clone
575        // see Jitterbug 2296
576
577        currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)timeStyle, NULL, &status);
578        if (U_FAILURE(status)) {
579           status = U_INVALID_FORMAT_ERROR;
580           return;
581        }
582        switch (ures_getType(currentBundle)) {
583            case URES_STRING: {
584               resStr = ures_getString(currentBundle, &resStrLen, &status);
585               break;
586            }
587            case URES_ARRAY: {
588               resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
589               ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
590               fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen);
591               break;
592            }
593            default: {
594               status = U_INVALID_FORMAT_ERROR;
595               ures_close(currentBundle);
596               return;
597            }
598        }
599        ures_close(currentBundle);
600
601        UnicodeString *tempus1 = new UnicodeString(TRUE, resStr, resStrLen);
602        // NULL pointer check
603        if (tempus1 == NULL) {
604            status = U_MEMORY_ALLOCATION_ERROR;
605            return;
606        }
607        timeDateArray[0].adoptString(tempus1);
608
609        currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)dateStyle, NULL, &status);
610        if (U_FAILURE(status)) {
611           status = U_INVALID_FORMAT_ERROR;
612           return;
613        }
614        switch (ures_getType(currentBundle)) {
615            case URES_STRING: {
616               resStr = ures_getString(currentBundle, &resStrLen, &status);
617               break;
618            }
619            case URES_ARRAY: {
620               resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
621               ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
622               fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
623               break;
624            }
625            default: {
626               status = U_INVALID_FORMAT_ERROR;
627               ures_close(currentBundle);
628               return;
629            }
630        }
631        ures_close(currentBundle);
632
633        UnicodeString *tempus2 = new UnicodeString(TRUE, resStr, resStrLen);
634        // Null pointer check
635        if (tempus2 == NULL) {
636            status = U_MEMORY_ALLOCATION_ERROR;
637            return;
638        }
639        timeDateArray[1].adoptString(tempus2);
640
641        int32_t glueIndex = kDateTime;
642        int32_t patternsSize = ures_getSize(dateTimePatterns);
643        if (patternsSize >= (kDateTimeOffset + kShort + 1)) {
644            // Get proper date time format
645            glueIndex = (int32_t)(kDateTimeOffset + (dateStyle - kDateOffset));
646        }
647
648        resStr = ures_getStringByIndex(dateTimePatterns, glueIndex, &resStrLen, &status);
649        MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status);
650    }
651    // if the pattern includes just time data or just date date, load the appropriate
652    // pattern string from the resources
653    // setTo() - see DateFormatSymbols::assignArray comments
654    else if (timeStyle != kNone) {
655        currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)timeStyle, NULL, &status);
656        if (U_FAILURE(status)) {
657           status = U_INVALID_FORMAT_ERROR;
658           return;
659        }
660        switch (ures_getType(currentBundle)) {
661            case URES_STRING: {
662               resStr = ures_getString(currentBundle, &resStrLen, &status);
663               break;
664            }
665            case URES_ARRAY: {
666               resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
667               ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
668               fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
669               break;
670            }
671            default: {
672               status = U_INVALID_FORMAT_ERROR;
673                ures_close(currentBundle);
674               return;
675            }
676        }
677        fPattern.setTo(TRUE, resStr, resStrLen);
678        ures_close(currentBundle);
679    }
680    else if (dateStyle != kNone) {
681        currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)dateStyle, NULL, &status);
682        if (U_FAILURE(status)) {
683           status = U_INVALID_FORMAT_ERROR;
684           return;
685        }
686        switch (ures_getType(currentBundle)) {
687            case URES_STRING: {
688               resStr = ures_getString(currentBundle, &resStrLen, &status);
689               break;
690            }
691            case URES_ARRAY: {
692               resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
693               ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
694               fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
695               break;
696            }
697            default: {
698               status = U_INVALID_FORMAT_ERROR;
699               ures_close(currentBundle);
700               return;
701            }
702        }
703        fPattern.setTo(TRUE, resStr, resStrLen);
704        ures_close(currentBundle);
705    }
706
707    // and if it includes _neither_, that's an error
708    else
709        status = U_INVALID_FORMAT_ERROR;
710
711    // finally, finish initializing by creating a Calendar and a NumberFormat
712    initialize(locale, status);
713}
714
715//----------------------------------------------------------------------
716
717Calendar*
718SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
719{
720    if(!U_FAILURE(status)) {
721        fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status);
722    }
723    if (U_SUCCESS(status) && fCalendar == NULL) {
724        status = U_MEMORY_ALLOCATION_ERROR;
725    }
726    return fCalendar;
727}
728
729void
730SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status)
731{
732  if(U_FAILURE(status)) {
733    fSymbols = NULL;
734  } else {
735    // pass in calendar type - use NULL (default) if no calendar set (or err).
736    fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status);
737    // Null pointer check
738    if (fSymbols == NULL) {
739        status = U_MEMORY_ALLOCATION_ERROR;
740        return;
741    }
742  }
743}
744
745void
746SimpleDateFormat::initialize(const Locale& locale,
747                             UErrorCode& status)
748{
749    if (U_FAILURE(status)) return;
750
751    // We don't need to check that the row count is >= 1, since all 2d arrays have at
752    // least one row
753    fNumberFormat = NumberFormat::createInstance(locale, status);
754    if (fNumberFormat != NULL && U_SUCCESS(status))
755    {
756        // no matter what the locale's default number format looked like, we want
757        // to modify it so that it doesn't use thousands separators, doesn't always
758        // show the decimal point, and recognizes integers only when parsing
759
760        fNumberFormat->setGroupingUsed(FALSE);
761        DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
762        if (decfmt != NULL) {
763            decfmt->setDecimalSeparatorAlwaysShown(FALSE);
764        }
765        fNumberFormat->setParseIntegerOnly(TRUE);
766        fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
767
768        //fNumberFormat->setLenient(TRUE); // Java uses a custom DateNumberFormat to format/parse
769
770        initNumberFormatters(locale,status);
771
772    }
773    else if (U_SUCCESS(status))
774    {
775        status = U_MISSING_RESOURCE_ERROR;
776    }
777}
778
779/* Initialize the fields we use to disambiguate ambiguous years. Separate
780 * so we can call it from readObject().
781 */
782void SimpleDateFormat::initializeDefaultCentury()
783{
784  if(fCalendar) {
785    fHaveDefaultCentury = fCalendar->haveDefaultCentury();
786    if(fHaveDefaultCentury) {
787      fDefaultCenturyStart = fCalendar->defaultCenturyStart();
788      fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
789    } else {
790      fDefaultCenturyStart = DBL_MIN;
791      fDefaultCenturyStartYear = -1;
792    }
793  }
794}
795
796/* Define one-century window into which to disambiguate dates using
797 * two-digit years. Make public in JDK 1.2.
798 */
799void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
800{
801    if(U_FAILURE(status)) {
802        return;
803    }
804    if(!fCalendar) {
805      status = U_ILLEGAL_ARGUMENT_ERROR;
806      return;
807    }
808
809    fCalendar->setTime(startDate, status);
810    if(U_SUCCESS(status)) {
811        fHaveDefaultCentury = TRUE;
812        fDefaultCenturyStart = startDate;
813        fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
814    }
815}
816
817//----------------------------------------------------------------------
818
819UnicodeString&
820SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
821{
822  UErrorCode status = U_ZERO_ERROR;
823  FieldPositionOnlyHandler handler(pos);
824  return _format(cal, appendTo, handler, status);
825}
826
827//----------------------------------------------------------------------
828
829UnicodeString&
830SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo,
831                         FieldPositionIterator* posIter, UErrorCode& status) const
832{
833  FieldPositionIteratorHandler handler(posIter, status);
834  return _format(cal, appendTo, handler, status);
835}
836
837//----------------------------------------------------------------------
838
839UnicodeString&
840SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo,
841                            FieldPositionHandler& handler, UErrorCode& status) const
842{
843    if ( U_FAILURE(status) ) {
844       return appendTo;
845    }
846    Calendar* workCal = &cal;
847    Calendar* calClone = NULL;
848    if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
849        // Different calendar type
850        // We use the time and time zone from the input calendar, but
851        // do not use the input calendar for field calculation.
852        calClone = fCalendar->clone();
853        if (calClone != NULL) {
854            UDate t = cal.getTime(status);
855            calClone->setTime(t, status);
856            calClone->setTimeZone(cal.getTimeZone());
857            workCal = calClone;
858        } else {
859            status = U_MEMORY_ALLOCATION_ERROR;
860            return appendTo;
861        }
862    }
863
864    UBool inQuote = FALSE;
865    UChar prevCh = 0;
866    int32_t count = 0;
867    int32_t fieldNum = 0;
868
869    // loop through the pattern string character by character
870    for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
871        UChar ch = fPattern[i];
872
873        // Use subFormat() to format a repeated pattern character
874        // when a different pattern or non-pattern character is seen
875        if (ch != prevCh && count > 0) {
876            subFormat(appendTo, prevCh, count, fCapitalizationContext, fieldNum++, handler, *workCal, status);
877            count = 0;
878        }
879        if (ch == QUOTE) {
880            // Consecutive single quotes are a single quote literal,
881            // either outside of quotes or between quotes
882            if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
883                appendTo += (UChar)QUOTE;
884                ++i;
885            } else {
886                inQuote = ! inQuote;
887            }
888        }
889        else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
890                    || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
891            // ch is a date-time pattern character to be interpreted
892            // by subFormat(); count the number of times it is repeated
893            prevCh = ch;
894            ++count;
895        }
896        else {
897            // Append quoted characters and unquoted non-pattern characters
898            appendTo += ch;
899        }
900    }
901
902    // Format the last item in the pattern, if any
903    if (count > 0) {
904        subFormat(appendTo, prevCh, count, fCapitalizationContext, fieldNum++, handler, *workCal, status);
905    }
906
907    if (calClone != NULL) {
908        delete calClone;
909    }
910
911    return appendTo;
912}
913
914//----------------------------------------------------------------------
915
916/* Map calendar field into calendar field level.
917 * the larger the level, the smaller the field unit.
918 * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
919 * UCAL_MONTH level is 20.
920 * NOTE: if new fields adds in, the table needs to update.
921 */
922const int32_t
923SimpleDateFormat::fgCalendarFieldToLevel[] =
924{
925    /*GyM*/ 0, 10, 20,
926    /*wW*/ 20, 30,
927    /*dDEF*/ 30, 20, 30, 30,
928    /*ahHm*/ 40, 50, 50, 60,
929    /*sS..*/ 70, 80,
930    /*z?Y*/ 0, 0, 10,
931    /*eug*/ 30, 10, 0,
932    /*A*/ 40
933};
934
935
936/* Map calendar field LETTER into calendar field level.
937 * the larger the level, the smaller the field unit.
938 * NOTE: if new fields adds in, the table needs to update.
939 */
940const int32_t
941SimpleDateFormat::fgPatternCharToLevel[] = {
942    //       A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
943        -1, 40, -1, -1, 20, 30, 30,  0, 50, -1, -1, 50, 20, 20, -1,  0,
944    //   P   Q   R   S   T   U   V   W   X   Y   Z
945        -1, 20, -1, 80, -1, 10,  0, 30,  0, 10,  0, -1, -1, -1, -1, -1,
946    //       a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
947        -1, 40, -1, 30, 30, 30, -1,  0, 50, -1, -1, 50, -1, 60, -1, -1,
948    //   p   q   r   s   t   u   v   w   x   y   z
949        -1, 20, -1, 70, -1, 10,  0, 20,  0, 10,  0, -1, -1, -1, -1, -1
950};
951
952
953// Map index into pattern character string to Calendar field number.
954const UCalendarDateFields
955SimpleDateFormat::fgPatternIndexToCalendarField[] =
956{
957    /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
958    /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
959    /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
960    /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
961    /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
962    /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
963    /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
964    /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
965    /*v*/   UCAL_ZONE_OFFSET,
966    /*c*/   UCAL_DOW_LOCAL,
967    /*L*/   UCAL_MONTH,
968    /*Q*/   UCAL_MONTH,
969    /*q*/   UCAL_MONTH,
970    /*V*/   UCAL_ZONE_OFFSET,
971    /*U*/   UCAL_YEAR,
972    /*O*/   UCAL_ZONE_OFFSET,
973    /*Xx*/  UCAL_ZONE_OFFSET, UCAL_ZONE_OFFSET,
974};
975
976// Map index into pattern character string to DateFormat field number
977const UDateFormatField
978SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
979    /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
980    /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
981    /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
982    /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
983    /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
984    /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
985    /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
986    /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
987    /*v*/   UDAT_TIMEZONE_GENERIC_FIELD,
988    /*c*/   UDAT_STANDALONE_DAY_FIELD,
989    /*L*/   UDAT_STANDALONE_MONTH_FIELD,
990    /*Q*/   UDAT_QUARTER_FIELD,
991    /*q*/   UDAT_STANDALONE_QUARTER_FIELD,
992    /*V*/   UDAT_TIMEZONE_SPECIAL_FIELD,
993    /*U*/   UDAT_YEAR_NAME_FIELD,
994    /*O*/   UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD,
995    /*Xx*/  UDAT_TIMEZONE_ISO_FIELD, UDAT_TIMEZONE_ISO_LOCAL_FIELD,
996};
997
998//----------------------------------------------------------------------
999
1000/**
1001 * Append symbols[value] to dst.  Make sure the array index is not out
1002 * of bounds.
1003 */
1004static inline void
1005_appendSymbol(UnicodeString& dst,
1006              int32_t value,
1007              const UnicodeString* symbols,
1008              int32_t symbolsCount) {
1009    U_ASSERT(0 <= value && value < symbolsCount);
1010    if (0 <= value && value < symbolsCount) {
1011        dst += symbols[value];
1012    }
1013}
1014
1015static inline void
1016_appendSymbolWithMonthPattern(UnicodeString& dst, int32_t value, const UnicodeString* symbols, int32_t symbolsCount,
1017              const UnicodeString* monthPattern, UErrorCode& status) {
1018    U_ASSERT(0 <= value && value < symbolsCount);
1019    if (0 <= value && value < symbolsCount) {
1020        if (monthPattern == NULL) {
1021            dst += symbols[value];
1022        } else {
1023            Formattable monthName((const UnicodeString&)(symbols[value]));
1024            MessageFormat::format(*monthPattern, &monthName, 1, dst, status);
1025        }
1026    }
1027}
1028
1029//----------------------------------------------------------------------
1030void
1031SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) {
1032    if (U_FAILURE(status)) {
1033        return;
1034    }
1035    if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) {
1036        return;
1037    }
1038    umtx_lock(&LOCK);
1039    if (fNumberFormatters == NULL) {
1040        fNumberFormatters = (NumberFormat**)uprv_malloc(UDAT_FIELD_COUNT * sizeof(NumberFormat*));
1041        if (fNumberFormatters) {
1042            for (int32_t i = 0; i < UDAT_FIELD_COUNT; i++) {
1043                fNumberFormatters[i] = fNumberFormat;
1044            }
1045        } else {
1046            status = U_MEMORY_ALLOCATION_ERROR;
1047        }
1048    }
1049    umtx_unlock(&LOCK);
1050
1051    processOverrideString(locale,fDateOverride,kOvrStrDate,status);
1052    processOverrideString(locale,fTimeOverride,kOvrStrTime,status);
1053
1054}
1055
1056void
1057SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) {
1058    if (str.isBogus()) {
1059        return;
1060    }
1061    int32_t start = 0;
1062    int32_t len;
1063    UnicodeString nsName;
1064    UnicodeString ovrField;
1065    UBool moreToProcess = TRUE;
1066
1067    while (moreToProcess) {
1068        int32_t delimiterPosition = str.indexOf((UChar)ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE,start);
1069        if (delimiterPosition == -1) {
1070            moreToProcess = FALSE;
1071            len = str.length() - start;
1072        } else {
1073            len = delimiterPosition - start;
1074        }
1075        UnicodeString currentString(str,start,len);
1076        int32_t equalSignPosition = currentString.indexOf((UChar)ULOC_KEYWORD_ASSIGN_UNICODE,0);
1077        if (equalSignPosition == -1) { // Simple override string such as "hebrew"
1078            nsName.setTo(currentString);
1079            ovrField.setToBogus();
1080        } else { // Field specific override string such as "y=hebrew"
1081            nsName.setTo(currentString,equalSignPosition+1);
1082            ovrField.setTo(currentString,0,1); // We just need the first character.
1083        }
1084
1085        int32_t nsNameHash = nsName.hashCode();
1086        // See if the numbering system is in the override list, if not, then add it.
1087        NSOverride *cur = fOverrideList;
1088        NumberFormat *nf = NULL;
1089        UBool found = FALSE;
1090        while ( cur && !found ) {
1091            if ( cur->hash == nsNameHash ) {
1092                nf = cur->nf;
1093                found = TRUE;
1094            }
1095            cur = cur->next;
1096        }
1097
1098        if (!found) {
1099           cur = (NSOverride *)uprv_malloc(sizeof(NSOverride));
1100           if (cur) {
1101               char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1102               uprv_strcpy(kw,"numbers=");
1103               nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV);
1104
1105               Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw);
1106               nf = NumberFormat::createInstance(ovrLoc,status);
1107
1108               // no matter what the locale's default number format looked like, we want
1109               // to modify it so that it doesn't use thousands separators, doesn't always
1110               // show the decimal point, and recognizes integers only when parsing
1111
1112               if (U_SUCCESS(status)) {
1113                   nf->setGroupingUsed(FALSE);
1114                   DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(nf);
1115                   if (decfmt != NULL) {
1116                       decfmt->setDecimalSeparatorAlwaysShown(FALSE);
1117                   }
1118                   nf->setParseIntegerOnly(TRUE);
1119                   nf->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
1120
1121                   cur->nf = nf;
1122                   cur->hash = nsNameHash;
1123                   cur->next = fOverrideList;
1124                   fOverrideList = cur;
1125               }
1126               else {
1127                   // clean up before returning
1128                   if (cur != NULL) {
1129                       uprv_free(cur);
1130                   }
1131                  return;
1132               }
1133
1134           } else {
1135               status = U_MEMORY_ALLOCATION_ERROR;
1136               return;
1137           }
1138        }
1139
1140        // Now that we have an appropriate number formatter, fill in the appropriate spaces in the
1141        // number formatters table.
1142
1143        if (ovrField.isBogus()) {
1144            switch (type) {
1145                case kOvrStrDate:
1146                case kOvrStrBoth: {
1147                    for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) {
1148                        fNumberFormatters[kDateFields[i]] = nf;
1149                    }
1150                    if (type==kOvrStrDate) {
1151                        break;
1152                    }
1153                }
1154                case kOvrStrTime : {
1155                    for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) {
1156                        fNumberFormatters[kTimeFields[i]] = nf;
1157                    }
1158                    break;
1159                }
1160            }
1161        } else {
1162           // if the pattern character is unrecognized, signal an error and bail out
1163           UDateFormatField patternCharIndex =
1164              DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0));
1165           if (patternCharIndex == UDAT_FIELD_COUNT) {
1166               status = U_INVALID_FORMAT_ERROR;
1167               return;
1168           }
1169
1170           // Set the number formatter in the table
1171           fNumberFormatters[patternCharIndex] = nf;
1172        }
1173
1174        start = delimiterPosition + 1;
1175    }
1176}
1177
1178//---------------------------------------------------------------------
1179void
1180SimpleDateFormat::subFormat(UnicodeString &appendTo,
1181                            UChar ch,
1182                            int32_t count,
1183                            UDisplayContext capitalizationContext,
1184                            int32_t fieldNum,
1185                            FieldPositionHandler& handler,
1186                            Calendar& cal,
1187                            UErrorCode& status) const
1188{
1189    if (U_FAILURE(status)) {
1190        return;
1191    }
1192
1193    // this function gets called by format() to produce the appropriate substitution
1194    // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1195
1196    UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
1197    const int32_t maxIntCount = 10;
1198    int32_t beginOffset = appendTo.length();
1199    NumberFormat *currentNumberFormat;
1200    DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther;
1201
1202    UBool isHebrewCalendar = (uprv_strcmp(cal.getType(),"hebrew") == 0);
1203    UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0);
1204
1205    // if the pattern character is unrecognized, signal an error and dump out
1206    if (patternCharIndex == UDAT_FIELD_COUNT)
1207    {
1208        if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
1209            status = U_INVALID_FORMAT_ERROR;
1210        }
1211        return;
1212    }
1213
1214    UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1215    int32_t value = cal.get(field, status);
1216    if (U_FAILURE(status)) {
1217        return;
1218    }
1219
1220    currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
1221    UnicodeString hebr("hebr", 4, US_INV);
1222
1223    switch (patternCharIndex) {
1224
1225    // for any "G" symbol, write out the appropriate era string
1226    // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1227    case UDAT_ERA_FIELD:
1228        if (isChineseCalendar) {
1229            zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J
1230        } else {
1231            if (count == 5) {
1232                _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1233                capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow;
1234            } else if (count == 4) {
1235                _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1236                capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide;
1237            } else {
1238                _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1239                capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev;
1240            }
1241        }
1242        break;
1243
1244     case UDAT_YEAR_NAME_FIELD:
1245        if (fSymbols->fShortYearNames != NULL && value <= fSymbols->fShortYearNamesCount) {
1246            // the Calendar YEAR field runs 1 through 60 for cyclic years
1247            _appendSymbol(appendTo, value - 1, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount);
1248            break;
1249        }
1250        // else fall through to numeric year handling, do not break here
1251
1252   // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1253    // NEW: UTS#35:
1254//Year         y     yy     yyy     yyyy     yyyyy
1255//AD 1         1     01     001     0001     00001
1256//AD 12       12     12     012     0012     00012
1257//AD 123     123     23     123     0123     00123
1258//AD 1234   1234     34    1234     1234     01234
1259//AD 12345 12345     45   12345    12345     12345
1260    case UDAT_YEAR_FIELD:
1261    case UDAT_YEAR_WOY_FIELD:
1262        if (fDateOverride.compare(hebr)==0 && value>HEBREW_CAL_CUR_MILLENIUM_START_YEAR && value<HEBREW_CAL_CUR_MILLENIUM_END_YEAR) {
1263            value-=HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
1264        }
1265        if(count == 2)
1266            zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2);
1267        else
1268            zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount);
1269        break;
1270
1271    // for "MMMM"/"LLLL", write out the whole month name, for "MMM"/"LLL", write out the month
1272    // abbreviation, for "M"/"L" or "MM"/"LL", write out the month as a number with the
1273    // appropriate number of digits
1274    // for "MMMMM"/"LLLLL", use the narrow form
1275    case UDAT_MONTH_FIELD:
1276    case UDAT_STANDALONE_MONTH_FIELD:
1277        if ( isHebrewCalendar ) {
1278           HebrewCalendar *hc = (HebrewCalendar*)&cal;
1279           if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 )
1280               value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar.
1281           if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6 && count < 3 )
1282               value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7.
1283        }
1284        {
1285            int32_t isLeapMonth = (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount)?
1286                        cal.get(UCAL_IS_LEAP_MONTH, status): 0;
1287            // should consolidate the next section by using arrays of pointers & counts for the right symbols...
1288            if (count == 5) {
1289                if (patternCharIndex == UDAT_MONTH_FIELD) {
1290                    _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fNarrowMonths, fSymbols->fNarrowMonthsCount,
1291                            (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatNarrow]): NULL, status);
1292                } else {
1293                    _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneNarrowMonths, fSymbols->fStandaloneNarrowMonthsCount,
1294                            (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneNarrow]): NULL, status);
1295                }
1296                capContextUsageType = DateFormatSymbols::kCapContextUsageMonthNarrow;
1297            } else if (count == 4) {
1298                if (patternCharIndex == UDAT_MONTH_FIELD) {
1299                    _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fMonths, fSymbols->fMonthsCount,
1300                            (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]): NULL, status);
1301                    capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1302                } else {
1303                    _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount,
1304                            (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]): NULL, status);
1305                    capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1306                }
1307            } else if (count == 3) {
1308                if (patternCharIndex == UDAT_MONTH_FIELD) {
1309                    _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fShortMonths, fSymbols->fShortMonthsCount,
1310                            (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]): NULL, status);
1311                    capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1312                } else {
1313                    _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount,
1314                            (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]): NULL, status);
1315                    capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1316                }
1317            } else {
1318                UnicodeString monthNumber;
1319                zeroPaddingNumber(currentNumberFormat,monthNumber, value + 1, count, maxIntCount);
1320                _appendSymbolWithMonthPattern(appendTo, 0, &monthNumber, 1,
1321                        (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric]): NULL, status);
1322            }
1323        }
1324        break;
1325
1326    // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1327    case UDAT_HOUR_OF_DAY1_FIELD:
1328        if (value == 0)
1329            zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1330        else
1331            zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1332        break;
1333
1334    case UDAT_FRACTIONAL_SECOND_FIELD:
1335        // Fractional seconds left-justify
1336        {
1337            currentNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count);
1338            currentNumberFormat->setMaximumIntegerDigits(maxIntCount);
1339            if (count == 1) {
1340                value /= 100;
1341            } else if (count == 2) {
1342                value /= 10;
1343            }
1344            FieldPosition p(0);
1345            currentNumberFormat->format(value, appendTo, p);
1346            if (count > 3) {
1347                currentNumberFormat->setMinimumIntegerDigits(count - 3);
1348                currentNumberFormat->format((int32_t)0, appendTo, p);
1349            }
1350        }
1351        break;
1352
1353    // for "ee" or "e", use local numeric day-of-the-week
1354    // for "EEEEEE" or "eeeeee", write out the short day-of-the-week name
1355    // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1356    // for "EEEE" or "eeee", write out the wide day-of-the-week name
1357    // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1358    case UDAT_DOW_LOCAL_FIELD:
1359        if ( count < 3 ) {
1360            zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1361            break;
1362        }
1363        // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1364        // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1365        value = cal.get(UCAL_DAY_OF_WEEK, status);
1366        if (U_FAILURE(status)) {
1367            return;
1368        }
1369        // fall through, do not break here
1370    case UDAT_DAY_OF_WEEK_FIELD:
1371        if (count == 5) {
1372            _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1373                          fSymbols->fNarrowWeekdaysCount);
1374            capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1375        } else if (count == 4) {
1376            _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1377                          fSymbols->fWeekdaysCount);
1378            capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1379        } else if (count == 6) {
1380            _appendSymbol(appendTo, value, fSymbols->fShorterWeekdays,
1381                          fSymbols->fShorterWeekdaysCount);
1382            capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1383        } else {
1384            _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1385                          fSymbols->fShortWeekdaysCount);
1386            capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1387        }
1388        break;
1389
1390    // for "ccc", write out the abbreviated day-of-the-week name
1391    // for "cccc", write out the wide day-of-the-week name
1392    // for "ccccc", use the narrow day-of-the-week name
1393    // for "ccccc", use the short day-of-the-week name
1394    case UDAT_STANDALONE_DAY_FIELD:
1395        if ( count < 3 ) {
1396            zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount);
1397            break;
1398        }
1399        // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1400        // we want standard day-of-week, so first fix value.
1401        value = cal.get(UCAL_DAY_OF_WEEK, status);
1402        if (U_FAILURE(status)) {
1403            return;
1404        }
1405        if (count == 5) {
1406            _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1407                          fSymbols->fStandaloneNarrowWeekdaysCount);
1408            capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1409        } else if (count == 4) {
1410            _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1411                          fSymbols->fStandaloneWeekdaysCount);
1412            capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1413        } else if (count == 6) {
1414            _appendSymbol(appendTo, value, fSymbols->fStandaloneShorterWeekdays,
1415                          fSymbols->fStandaloneShorterWeekdaysCount);
1416            capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1417        } else { // count == 3
1418            _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1419                          fSymbols->fStandaloneShortWeekdaysCount);
1420            capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1421        }
1422        break;
1423
1424    // for and "a" symbol, write out the whole AM/PM string
1425    case UDAT_AM_PM_FIELD:
1426        _appendSymbol(appendTo, value, fSymbols->fAmPms,
1427                      fSymbols->fAmPmsCount);
1428        break;
1429
1430    // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1431    // as "12"
1432    case UDAT_HOUR1_FIELD:
1433        if (value == 0)
1434            zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1435        else
1436            zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1437        break;
1438
1439    case UDAT_TIMEZONE_FIELD: // 'z'
1440    case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
1441    case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
1442    case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
1443    case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
1444    case UDAT_TIMEZONE_ISO_FIELD: // 'X'
1445    case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
1446        {
1447            UnicodeString zoneString;
1448            const TimeZone& tz = cal.getTimeZone();
1449            UDate date = cal.getTime(status);
1450            if (U_SUCCESS(status)) {
1451                if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1452                    if (count < 4) {
1453                        // "z", "zz", "zzz"
1454                        tzFormat()->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString);
1455                        capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1456                    } else {
1457                        // "zzzz" or longer
1458                        tzFormat()->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString);
1459                        capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1460                    }
1461                }
1462                else if (patternCharIndex == UDAT_TIMEZONE_RFC_FIELD) {
1463                    if (count < 4) {
1464                        // "Z"
1465                        tzFormat()->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1466                    } else if (count == 5) {
1467                        // "ZZZZZ"
1468                        tzFormat()->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1469                    } else {
1470                        // "ZZ", "ZZZ", "ZZZZ"
1471                        tzFormat()->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1472                    }
1473                }
1474                else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1475                    if (count == 1) {
1476                        // "v"
1477                        tzFormat()->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString);
1478                        capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1479                    } else if (count == 4) {
1480                        // "vvvv"
1481                        tzFormat()->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString);
1482                        capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1483                    }
1484                }
1485                else if (patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD) {
1486                    if (count == 1) {
1487                        // "V"
1488                        tzFormat()->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString);
1489                    } else if (count == 2) {
1490                        // "VV"
1491                        tzFormat()->format(UTZFMT_STYLE_ZONE_ID, tz, date, zoneString);
1492                    } else if (count == 3) {
1493                        // "VVV"
1494                        tzFormat()->format(UTZFMT_STYLE_EXEMPLAR_LOCATION, tz, date, zoneString);
1495                    } else if (count == 4) {
1496                        // "VVVV"
1497                        tzFormat()->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString);
1498                        capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong;
1499                    }
1500                }
1501                else if (patternCharIndex == UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD) {
1502                    if (count == 1) {
1503                        // "O"
1504                        tzFormat()->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString);
1505                    } else if (count == 4) {
1506                        // "OOOO"
1507                        tzFormat()->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1508                    }
1509                }
1510                else if (patternCharIndex == UDAT_TIMEZONE_ISO_FIELD) {
1511                    if (count == 1) {
1512                        // "X"
1513                        tzFormat()->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString);
1514                    } else if (count == 2) {
1515                        // "XX"
1516                        tzFormat()->format(UTZFMT_STYLE_ISO_BASIC_FIXED, tz, date, zoneString);
1517                    } else if (count == 3) {
1518                        // "XXX"
1519                        tzFormat()->format(UTZFMT_STYLE_ISO_EXTENDED_FIXED, tz, date, zoneString);
1520                    } else if (count == 4) {
1521                        // "XXXX"
1522                        tzFormat()->format(UTZFMT_STYLE_ISO_BASIC_FULL, tz, date, zoneString);
1523                    } else if (count == 5) {
1524                        // "XXXXX"
1525                        tzFormat()->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1526                    }
1527                }
1528                else if (patternCharIndex == UDAT_TIMEZONE_ISO_LOCAL_FIELD) {
1529                    if (count == 1) {
1530                        // "x"
1531                        tzFormat()->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString);
1532                    } else if (count == 2) {
1533                        // "xx"
1534                        tzFormat()->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, tz, date, zoneString);
1535                    } else if (count == 3) {
1536                        // "xxx"
1537                        tzFormat()->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, tz, date, zoneString);
1538                    } else if (count == 4) {
1539                        // "xxxx"
1540                        tzFormat()->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1541                    } else if (count == 5) {
1542                        // "xxxxx"
1543                        tzFormat()->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString);
1544                    }
1545                }
1546                else {
1547                    U_ASSERT(FALSE);
1548                }
1549            }
1550            appendTo += zoneString;
1551        }
1552        break;
1553
1554    case UDAT_QUARTER_FIELD:
1555        if (count >= 4)
1556            _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1557                          fSymbols->fQuartersCount);
1558        else if (count == 3)
1559            _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1560                          fSymbols->fShortQuartersCount);
1561        else
1562            zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1563        break;
1564
1565    case UDAT_STANDALONE_QUARTER_FIELD:
1566        if (count >= 4)
1567            _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1568                          fSymbols->fStandaloneQuartersCount);
1569        else if (count == 3)
1570            _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1571                          fSymbols->fStandaloneShortQuartersCount);
1572        else
1573            zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1574        break;
1575
1576
1577    // all of the other pattern symbols can be formatted as simple numbers with
1578    // appropriate zero padding
1579    default:
1580        zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1581        break;
1582    }
1583#if !UCONFIG_NO_BREAK_ITERATION
1584    if (fieldNum == 0) {
1585        // first field, check to see whether we need to titlecase it
1586        UBool titlecase = FALSE;
1587        switch (capitalizationContext) {
1588            case UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE:
1589                titlecase = TRUE;
1590                break;
1591            case UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU:
1592                titlecase = fSymbols->fCapitalization[capContextUsageType][0];
1593                break;
1594            case UDISPCTX_CAPITALIZATION_FOR_STANDALONE:
1595                titlecase = fSymbols->fCapitalization[capContextUsageType][1];
1596                break;
1597            default:
1598                // titlecase = FALSE;
1599                break;
1600        }
1601        if (titlecase) {
1602            UnicodeString firstField(appendTo, beginOffset);
1603            firstField.toTitle(NULL, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1604            appendTo.replaceBetween(beginOffset, appendTo.length(), firstField);
1605        }
1606    }
1607#endif
1608
1609    handler.addAttribute(fgPatternIndexToDateFormatField[patternCharIndex], beginOffset, appendTo.length());
1610}
1611
1612//----------------------------------------------------------------------
1613
1614NumberFormat *
1615SimpleDateFormat::getNumberFormatByIndex(UDateFormatField index) const {
1616    if (fNumberFormatters != NULL) {
1617        return fNumberFormatters[index];
1618    } else {
1619        return fNumberFormat;
1620    }
1621}
1622
1623//----------------------------------------------------------------------
1624void
1625SimpleDateFormat::zeroPaddingNumber(NumberFormat *currentNumberFormat,UnicodeString &appendTo,
1626                                    int32_t value, int32_t minDigits, int32_t maxDigits) const
1627{
1628    if (currentNumberFormat!=NULL) {
1629        FieldPosition pos(0);
1630
1631        currentNumberFormat->setMinimumIntegerDigits(minDigits);
1632        currentNumberFormat->setMaximumIntegerDigits(maxDigits);
1633        currentNumberFormat->format(value, appendTo, pos);  // 3rd arg is there to speed up processing
1634    }
1635}
1636
1637//----------------------------------------------------------------------
1638
1639/**
1640 * Return true if the given format character, occuring count
1641 * times, represents a numeric field.
1642 */
1643UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) {
1644    return DateFormatSymbols::isNumericPatternChar(formatChar, count);
1645}
1646
1647UBool
1648SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) {
1649    if (patternOffset >= pattern.length()) {
1650        // not at any field
1651        return FALSE;
1652    }
1653    UChar ch = pattern.charAt(patternOffset);
1654    UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
1655    if (f == UDAT_FIELD_COUNT) {
1656        // not at any field
1657        return FALSE;
1658    }
1659    int32_t i = patternOffset;
1660    while (pattern.charAt(++i) == ch) {}
1661    return DateFormatSymbols::isNumericField(f, i - patternOffset);
1662}
1663
1664UBool
1665SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) {
1666    if (patternOffset <= 0) {
1667        // not after any field
1668        return FALSE;
1669    }
1670    UChar ch = pattern.charAt(--patternOffset);
1671    UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
1672    if (f == UDAT_FIELD_COUNT) {
1673        // not after any field
1674        return FALSE;
1675    }
1676    int32_t i = patternOffset;
1677    while (pattern.charAt(--i) == ch) {}
1678    return !DateFormatSymbols::isNumericField(f, patternOffset - i);
1679}
1680
1681void
1682SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
1683{
1684    UErrorCode status = U_ZERO_ERROR;
1685    int32_t pos = parsePos.getIndex();
1686    int32_t start = pos;
1687
1688    UBool ambiguousYear[] = { FALSE };
1689    int32_t saveHebrewMonth = -1;
1690    int32_t count = 0;
1691
1692    UBool lenient = isLenient();
1693
1694    // hack, reset tztype, cast away const
1695    ((SimpleDateFormat*)this)->tztype = UTZFMT_TIME_TYPE_UNKNOWN;
1696
1697    // For parsing abutting numeric fields. 'abutPat' is the
1698    // offset into 'pattern' of the first of 2 or more abutting
1699    // numeric fields.  'abutStart' is the offset into 'text'
1700    // where parsing the fields begins. 'abutPass' starts off as 0
1701    // and increments each time we try to parse the fields.
1702    int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
1703    int32_t abutStart = 0;
1704    int32_t abutPass = 0;
1705    UBool inQuote = FALSE;
1706
1707    MessageFormat * numericLeapMonthFormatter = NULL;
1708
1709    Calendar* calClone = NULL;
1710    Calendar *workCal = &cal;
1711    if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
1712        // Different calendar type
1713        // We use the time/zone from the input calendar, but
1714        // do not use the input calendar for field calculation.
1715        calClone = fCalendar->clone();
1716        if (calClone != NULL) {
1717            calClone->setTime(cal.getTime(status),status);
1718            if (U_FAILURE(status)) {
1719                goto ExitParse;
1720            }
1721            calClone->setTimeZone(cal.getTimeZone());
1722            workCal = calClone;
1723        } else {
1724            status = U_MEMORY_ALLOCATION_ERROR;
1725            goto ExitParse;
1726        }
1727    }
1728
1729    if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
1730        numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, status);
1731        if (numericLeapMonthFormatter == NULL) {
1732             status = U_MEMORY_ALLOCATION_ERROR;
1733             goto ExitParse;
1734        } else if (U_FAILURE(status)) {
1735             goto ExitParse; // this will delete numericLeapMonthFormatter
1736        }
1737    }
1738
1739    for (int32_t i=0; i<fPattern.length(); ++i) {
1740        UChar ch = fPattern.charAt(i);
1741
1742        // Handle alphabetic field characters.
1743        if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z]
1744            int32_t fieldPat = i;
1745
1746            // Count the length of this field specifier
1747            count = 1;
1748            while ((i+1)<fPattern.length() &&
1749                   fPattern.charAt(i+1) == ch) {
1750                ++count;
1751                ++i;
1752            }
1753
1754            if (isNumeric(ch, count)) {
1755                if (abutPat < 0) {
1756                    // Determine if there is an abutting numeric field.
1757                    // Record the start of a set of abutting numeric fields.
1758                    if (isAtNumericField(fPattern, i + 1)) {
1759                        abutPat = fieldPat;
1760                        abutStart = pos;
1761                        abutPass = 0;
1762                    }
1763                }
1764            } else {
1765                abutPat = -1; // End of any abutting fields
1766            }
1767
1768            // Handle fields within a run of abutting numeric fields.  Take
1769            // the pattern "HHmmss" as an example. We will try to parse
1770            // 2/2/2 characters of the input text, then if that fails,
1771            // 1/2/2.  We only adjust the width of the leftmost field; the
1772            // others remain fixed.  This allows "123456" => 12:34:56, but
1773            // "12345" => 1:23:45.  Likewise, for the pattern "yyyyMMdd" we
1774            // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
1775            if (abutPat >= 0) {
1776                // If we are at the start of a run of abutting fields, then
1777                // shorten this field in each pass.  If we can't shorten
1778                // this field any more, then the parse of this set of
1779                // abutting numeric fields has failed.
1780                if (fieldPat == abutPat) {
1781                    count -= abutPass++;
1782                    if (count == 0) {
1783                        status = U_PARSE_ERROR;
1784                        goto ExitParse;
1785                    }
1786                }
1787
1788                pos = subParse(text, pos, ch, count,
1789                               TRUE, FALSE, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter);
1790
1791                // If the parse fails anywhere in the run, back up to the
1792                // start of the run and retry.
1793                if (pos < 0) {
1794                    i = abutPat - 1;
1795                    pos = abutStart;
1796                    continue;
1797                }
1798            }
1799
1800            // Handle non-numeric fields and non-abutting numeric
1801            // fields.
1802            else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
1803                int32_t s = subParse(text, pos, ch, count,
1804                               FALSE, TRUE, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter);
1805
1806                if (s == -pos-1) {
1807                    // era not present, in special cases allow this to continue
1808                    // from the position where the era was expected
1809                    s = pos;
1810
1811                    if (i+1 < fPattern.length()) {
1812                        // move to next pattern character
1813                        UChar ch = fPattern.charAt(i+1);
1814
1815                        // check for whitespace
1816                        if (PatternProps::isWhiteSpace(ch)) {
1817                            i++;
1818                            // Advance over run in pattern
1819                            while ((i+1)<fPattern.length() &&
1820                                   PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
1821                                ++i;
1822                            }
1823                        }
1824                    }
1825                }
1826                else if (s <= 0) {
1827                    status = U_PARSE_ERROR;
1828                    goto ExitParse;
1829                }
1830                pos = s;
1831            }
1832        }
1833
1834        // Handle literal pattern characters.  These are any
1835        // quoted characters and non-alphabetic unquoted
1836        // characters.
1837        else {
1838
1839            abutPat = -1; // End of any abutting fields
1840
1841            if (! matchLiterals(fPattern, i, text, pos, lenient)) {
1842                status = U_PARSE_ERROR;
1843                goto ExitParse;
1844            }
1845        }
1846    }
1847
1848    // Special hack for trailing "." after non-numeric field.
1849    if (text.charAt(pos) == 0x2e && lenient) {
1850        // only do if the last field is not numeric
1851        if (isAfterNonNumericField(fPattern, fPattern.length())) {
1852            pos++; // skip the extra "."
1853        }
1854    }
1855
1856    // At this point the fields of Calendar have been set.  Calendar
1857    // will fill in default values for missing fields when the time
1858    // is computed.
1859
1860    parsePos.setIndex(pos);
1861
1862    // This part is a problem:  When we call parsedDate.after, we compute the time.
1863    // Take the date April 3 2004 at 2:30 am.  When this is first set up, the year
1864    // will be wrong if we're parsing a 2-digit year pattern.  It will be 1904.
1865    // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day.  2:30 am
1866    // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
1867    // on that day.  It is therefore parsed out to fields as 3:30 am.  Then we
1868    // add 100 years, and get April 3 2004 at 3:30 am.  Note that April 3 2004 is
1869    // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
1870    /*
1871        UDate parsedDate = calendar.getTime();
1872        if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
1873            calendar.add(Calendar.YEAR, 100);
1874            parsedDate = calendar.getTime();
1875        }
1876    */
1877    // Because of the above condition, save off the fields in case we need to readjust.
1878    // The procedure we use here is not particularly efficient, but there is no other
1879    // way to do this given the API restrictions present in Calendar.  We minimize
1880    // inefficiency by only performing this computation when it might apply, that is,
1881    // when the two-digit year is equal to the start year, and thus might fall at the
1882    // front or the back of the default century.  This only works because we adjust
1883    // the year correctly to start with in other cases -- see subParse().
1884    if (ambiguousYear[0] || tztype != UTZFMT_TIME_TYPE_UNKNOWN) // If this is true then the two-digit year == the default start year
1885    {
1886        // We need a copy of the fields, and we need to avoid triggering a call to
1887        // complete(), which will recalculate the fields.  Since we can't access
1888        // the fields[] array in Calendar, we clone the entire object.  This will
1889        // stop working if Calendar.clone() is ever rewritten to call complete().
1890        Calendar *copy;
1891        if (ambiguousYear[0]) {
1892            copy = cal.clone();
1893            // Check for failed cloning.
1894            if (copy == NULL) {
1895                status = U_MEMORY_ALLOCATION_ERROR;
1896                goto ExitParse;
1897            }
1898            UDate parsedDate = copy->getTime(status);
1899            // {sfb} check internalGetDefaultCenturyStart
1900            if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
1901                // We can't use add here because that does a complete() first.
1902                cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
1903            }
1904            delete copy;
1905        }
1906
1907        if (tztype != UTZFMT_TIME_TYPE_UNKNOWN) {
1908            copy = cal.clone();
1909            // Check for failed cloning.
1910            if (copy == NULL) {
1911                status = U_MEMORY_ALLOCATION_ERROR;
1912                goto ExitParse;
1913            }
1914            const TimeZone & tz = cal.getTimeZone();
1915            BasicTimeZone *btz = NULL;
1916
1917            if (dynamic_cast<const OlsonTimeZone *>(&tz) != NULL
1918                || dynamic_cast<const SimpleTimeZone *>(&tz) != NULL
1919                || dynamic_cast<const RuleBasedTimeZone *>(&tz) != NULL
1920                || dynamic_cast<const VTimeZone *>(&tz) != NULL) {
1921                btz = (BasicTimeZone*)&tz;
1922            }
1923
1924            // Get local millis
1925            copy->set(UCAL_ZONE_OFFSET, 0);
1926            copy->set(UCAL_DST_OFFSET, 0);
1927            UDate localMillis = copy->getTime(status);
1928
1929            // Make sure parsed time zone type (Standard or Daylight)
1930            // matches the rule used by the parsed time zone.
1931            int32_t raw, dst;
1932            if (btz != NULL) {
1933                if (tztype == UTZFMT_TIME_TYPE_STANDARD) {
1934                    btz->getOffsetFromLocal(localMillis,
1935                        BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status);
1936                } else {
1937                    btz->getOffsetFromLocal(localMillis,
1938                        BasicTimeZone::kDaylight, BasicTimeZone::kDaylight, raw, dst, status);
1939                }
1940            } else {
1941                // No good way to resolve ambiguous time at transition,
1942                // but following code work in most case.
1943                tz.getOffset(localMillis, TRUE, raw, dst, status);
1944            }
1945
1946            // Now, compare the results with parsed type, either standard or daylight saving time
1947            int32_t resolvedSavings = dst;
1948            if (tztype == UTZFMT_TIME_TYPE_STANDARD) {
1949                if (dst != 0) {
1950                    // Override DST_OFFSET = 0 in the result calendar
1951                    resolvedSavings = 0;
1952                }
1953            } else { // tztype == TZTYPE_DST
1954                if (dst == 0) {
1955                    if (btz != NULL) {
1956                        UDate time = localMillis + raw;
1957                        // We use the nearest daylight saving time rule.
1958                        TimeZoneTransition beforeTrs, afterTrs;
1959                        UDate beforeT = time, afterT = time;
1960                        int32_t beforeSav = 0, afterSav = 0;
1961                        UBool beforeTrsAvail, afterTrsAvail;
1962
1963                        // Search for DST rule before or on the time
1964                        while (TRUE) {
1965                            beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs);
1966                            if (!beforeTrsAvail) {
1967                                break;
1968                            }
1969                            beforeT = beforeTrs.getTime() - 1;
1970                            beforeSav = beforeTrs.getFrom()->getDSTSavings();
1971                            if (beforeSav != 0) {
1972                                break;
1973                            }
1974                        }
1975
1976                        // Search for DST rule after the time
1977                        while (TRUE) {
1978                            afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs);
1979                            if (!afterTrsAvail) {
1980                                break;
1981                            }
1982                            afterT = afterTrs.getTime();
1983                            afterSav = afterTrs.getTo()->getDSTSavings();
1984                            if (afterSav != 0) {
1985                                break;
1986                            }
1987                        }
1988
1989                        if (beforeTrsAvail && afterTrsAvail) {
1990                            if (time - beforeT > afterT - time) {
1991                                resolvedSavings = afterSav;
1992                            } else {
1993                                resolvedSavings = beforeSav;
1994                            }
1995                        } else if (beforeTrsAvail && beforeSav != 0) {
1996                            resolvedSavings = beforeSav;
1997                        } else if (afterTrsAvail && afterSav != 0) {
1998                            resolvedSavings = afterSav;
1999                        } else {
2000                            resolvedSavings = btz->getDSTSavings();
2001                        }
2002                    } else {
2003                        resolvedSavings = tz.getDSTSavings();
2004                    }
2005                    if (resolvedSavings == 0) {
2006                        // final fallback
2007                        resolvedSavings = U_MILLIS_PER_HOUR;
2008                    }
2009                }
2010            }
2011            cal.set(UCAL_ZONE_OFFSET, raw);
2012            cal.set(UCAL_DST_OFFSET, resolvedSavings);
2013            delete copy;
2014        }
2015    }
2016ExitParse:
2017    // Set the parsed result if local calendar is used
2018    // instead of the input calendar
2019    if (U_SUCCESS(status) && workCal != &cal) {
2020        cal.setTimeZone(workCal->getTimeZone());
2021        cal.setTime(workCal->getTime(status), status);
2022    }
2023
2024    if (numericLeapMonthFormatter != NULL) {
2025        delete numericLeapMonthFormatter;
2026    }
2027    if (calClone != NULL) {
2028        delete calClone;
2029    }
2030
2031    // If any Calendar calls failed, we pretend that we
2032    // couldn't parse the string, when in reality this isn't quite accurate--
2033    // we did parse it; the Calendar calls just failed.
2034    if (U_FAILURE(status)) {
2035        parsePos.setErrorIndex(pos);
2036        parsePos.setIndex(start);
2037    }
2038}
2039
2040UDate
2041SimpleDateFormat::parse( const UnicodeString& text,
2042                         ParsePosition& pos) const {
2043    // redefined here because the other parse() function hides this function's
2044    // cunterpart on DateFormat
2045    return DateFormat::parse(text, pos);
2046}
2047
2048UDate
2049SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const
2050{
2051    // redefined here because the other parse() function hides this function's
2052    // counterpart on DateFormat
2053    return DateFormat::parse(text, status);
2054}
2055//----------------------------------------------------------------------
2056
2057static UBool
2058newBestMatchWithOptionalDot(const UnicodeString &lcaseText,
2059                            const UnicodeString &data,
2060                            UnicodeString &bestMatchName,
2061                            int32_t &bestMatchLength);
2062
2063int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
2064                              int32_t start,
2065                              UCalendarDateFields field,
2066                              const UnicodeString* data,
2067                              int32_t dataCount,
2068                              Calendar& cal) const
2069{
2070    int32_t i = 0;
2071    int32_t count = dataCount;
2072
2073    // There may be multiple strings in the data[] array which begin with
2074    // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2075    // We keep track of the longest match, and return that.  Note that this
2076    // unfortunately requires us to test all array elements.
2077    int32_t bestMatchLength = 0, bestMatch = -1;
2078    UnicodeString bestMatchName;
2079
2080    // {sfb} kludge to support case-insensitive comparison
2081    // {markus 2002oct11} do not just use caseCompareBetween because we do not know
2082    // the length of the match after case folding
2083    // {alan 20040607} don't case change the whole string, since the length
2084    // can change
2085    // TODO we need a case-insensitive startsWith function
2086    UnicodeString lcaseText;
2087    text.extract(start, INT32_MAX, lcaseText);
2088    lcaseText.foldCase();
2089
2090    for (; i < count; ++i)
2091    {
2092        // Always compare if we have no match yet; otherwise only compare
2093        // against potentially better matches (longer strings).
2094
2095        if (newBestMatchWithOptionalDot(lcaseText, data[i], bestMatchName, bestMatchLength)) {
2096            bestMatch = i;
2097        }
2098    }
2099    if (bestMatch >= 0)
2100    {
2101        cal.set(field, bestMatch * 3);
2102
2103        // Once we have a match, we have to determine the length of the
2104        // original source string.  This will usually be == the length of
2105        // the case folded string, but it may differ (e.g. sharp s).
2106
2107        // Most of the time, the length will be the same as the length
2108        // of the string from the locale data.  Sometimes it will be
2109        // different, in which case we will have to figure it out by
2110        // adding a character at a time, until we have a match.  We do
2111        // this all in one loop, where we try 'len' first (at index
2112        // i==0).
2113        int32_t len = bestMatchName.length(); // 99+% of the time
2114        int32_t n = text.length() - start;
2115        for (i=0; i<=n; ++i) {
2116            int32_t j=i;
2117            if (i == 0) {
2118                j = len;
2119            } else if (i == len) {
2120                continue; // already tried this when i was 0
2121            }
2122            text.extract(start, j, lcaseText);
2123            lcaseText.foldCase();
2124            if (bestMatchName == lcaseText) {
2125                return start + j;
2126            }
2127        }
2128    }
2129
2130    return -start;
2131}
2132
2133//----------------------------------------------------------------------
2134UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
2135                                      int32_t &patternOffset,
2136                                      const UnicodeString &text,
2137                                      int32_t &textOffset,
2138                                      UBool lenient)
2139{
2140    UBool inQuote = FALSE;
2141    UnicodeString literal;
2142    int32_t i = patternOffset;
2143
2144    // scan pattern looking for contiguous literal characters
2145    for ( ; i < pattern.length(); i += 1) {
2146        UChar ch = pattern.charAt(i);
2147
2148        if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z]
2149            break;
2150        }
2151
2152        if (ch == QUOTE) {
2153            // Match a quote literal ('') inside OR outside of quotes
2154            if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
2155                i += 1;
2156            } else {
2157                inQuote = !inQuote;
2158                continue;
2159            }
2160        }
2161
2162        literal += ch;
2163    }
2164
2165    // at this point, literal contains the literal text
2166    // and i is the index of the next non-literal pattern character.
2167    int32_t p;
2168    int32_t t = textOffset;
2169
2170    if (lenient) {
2171        // trim leading, trailing whitespace from
2172        // the literal text
2173        literal.trim();
2174
2175        // ignore any leading whitespace in the text
2176        while (t < text.length() && u_isWhitespace(text.charAt(t))) {
2177            t += 1;
2178        }
2179    }
2180
2181    for (p = 0; p < literal.length() && t < text.length();) {
2182        UBool needWhitespace = FALSE;
2183
2184        while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) {
2185            needWhitespace = TRUE;
2186            p += 1;
2187        }
2188
2189        if (needWhitespace) {
2190            int32_t tStart = t;
2191
2192            while (t < text.length()) {
2193                UChar tch = text.charAt(t);
2194
2195                if (!u_isUWhiteSpace(tch) && !PatternProps::isWhiteSpace(tch)) {
2196                    break;
2197                }
2198
2199                t += 1;
2200            }
2201
2202            // TODO: should we require internal spaces
2203            // in lenient mode? (There won't be any
2204            // leading or trailing spaces)
2205            if (!lenient && t == tStart) {
2206                // didn't find matching whitespace:
2207                // an error in strict mode
2208                return FALSE;
2209            }
2210
2211            // In strict mode, this run of whitespace
2212            // may have been at the end.
2213            if (p >= literal.length()) {
2214                break;
2215            }
2216        }
2217
2218        if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
2219            // Ran out of text, or found a non-matching character:
2220            // OK in lenient mode, an error in strict mode.
2221            if (lenient) {
2222                if (t == textOffset && text.charAt(t) == 0x2e &&
2223                        isAfterNonNumericField(pattern, patternOffset)) {
2224                    // Lenient mode and the literal input text begins with a "." and
2225                    // we are after a non-numeric field: We skip the "."
2226                    ++t;
2227                    continue;  // Do not update p.
2228                }
2229                break;
2230            }
2231
2232            return FALSE;
2233        }
2234        ++p;
2235        ++t;
2236    }
2237
2238    // At this point if we're in strict mode we have a complete match.
2239    // If we're in lenient mode we may have a partial match, or no
2240    // match at all.
2241    if (p <= 0) {
2242        // no match. Pretend it matched a run of whitespace
2243        // and ignorables in the text.
2244        const  UnicodeSet *ignorables = NULL;
2245        UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i));
2246        if (patternCharIndex != UDAT_FIELD_COUNT) {
2247            ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
2248        }
2249
2250        for (t = textOffset; t < text.length(); t += 1) {
2251            UChar ch = text.charAt(t);
2252
2253            if (ignorables == NULL || !ignorables->contains(ch)) {
2254                break;
2255            }
2256        }
2257    }
2258
2259    // if we get here, we've got a complete match.
2260    patternOffset = i - 1;
2261    textOffset = t;
2262
2263    return TRUE;
2264}
2265
2266//----------------------------------------------------------------------
2267
2268int32_t SimpleDateFormat::matchString(const UnicodeString& text,
2269                              int32_t start,
2270                              UCalendarDateFields field,
2271                              const UnicodeString* data,
2272                              int32_t dataCount,
2273                              const UnicodeString* monthPattern,
2274                              Calendar& cal) const
2275{
2276    int32_t i = 0;
2277    int32_t count = dataCount;
2278
2279    if (field == UCAL_DAY_OF_WEEK) i = 1;
2280
2281    // There may be multiple strings in the data[] array which begin with
2282    // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2283    // We keep track of the longest match, and return that.  Note that this
2284    // unfortunately requires us to test all array elements.
2285    int32_t bestMatchLength = 0, bestMatch = -1;
2286    UnicodeString bestMatchName;
2287    int32_t isLeapMonth = 0;
2288
2289    // {sfb} kludge to support case-insensitive comparison
2290    // {markus 2002oct11} do not just use caseCompareBetween because we do not know
2291    // the length of the match after case folding
2292    // {alan 20040607} don't case change the whole string, since the length
2293    // can change
2294    // TODO we need a case-insensitive startsWith function
2295    UnicodeString lcaseText;
2296    text.extract(start, INT32_MAX, lcaseText);
2297    lcaseText.foldCase();
2298
2299    for (; i < count; ++i)
2300    {
2301        // Always compare if we have no match yet; otherwise only compare
2302        // against potentially better matches (longer strings).
2303
2304        if (newBestMatchWithOptionalDot(lcaseText, data[i], bestMatchName, bestMatchLength)) {
2305            bestMatch = i;
2306            isLeapMonth = 0;
2307        }
2308
2309        if (monthPattern != NULL) {
2310            UErrorCode status = U_ZERO_ERROR;
2311            UnicodeString leapMonthName;
2312            Formattable monthName((const UnicodeString&)(data[i]));
2313            MessageFormat::format(*monthPattern, &monthName, 1, leapMonthName, status);
2314            if (U_SUCCESS(status)) {
2315                if (newBestMatchWithOptionalDot(lcaseText, leapMonthName, bestMatchName, bestMatchLength)) {
2316                    bestMatch = i;
2317                    isLeapMonth = 1;
2318                }
2319            }
2320        }
2321    }
2322    if (bestMatch >= 0)
2323    {
2324        // Adjustment for Hebrew Calendar month Adar II
2325        if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) {
2326            cal.set(field,6);
2327        }
2328        else {
2329            if (field == UCAL_YEAR) {
2330                bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60
2331            }
2332            cal.set(field, bestMatch);
2333        }
2334        if (monthPattern != NULL) {
2335            cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth);
2336        }
2337
2338        // Once we have a match, we have to determine the length of the
2339        // original source string.  This will usually be == the length of
2340        // the case folded string, but it may differ (e.g. sharp s).
2341
2342        // Most of the time, the length will be the same as the length
2343        // of the string from the locale data.  Sometimes it will be
2344        // different, in which case we will have to figure it out by
2345        // adding a character at a time, until we have a match.  We do
2346        // this all in one loop, where we try 'len' first (at index
2347        // i==0).
2348        int32_t len = bestMatchName.length(); // 99+% of the time
2349        int32_t n = text.length() - start;
2350        for (i=0; i<=n; ++i) {
2351            int32_t j=i;
2352            if (i == 0) {
2353                j = len;
2354            } else if (i == len) {
2355                continue; // already tried this when i was 0
2356            }
2357            text.extract(start, j, lcaseText);
2358            lcaseText.foldCase();
2359            if (bestMatchName == lcaseText) {
2360                return start + j;
2361            }
2362        }
2363    }
2364
2365    return -start;
2366}
2367
2368static UBool
2369newBestMatchWithOptionalDot(const UnicodeString &lcaseText,
2370                            const UnicodeString &data,
2371                            UnicodeString &bestMatchName,
2372                            int32_t &bestMatchLength) {
2373    UnicodeString lcase;
2374    lcase.fastCopyFrom(data).foldCase();
2375    int32_t length = lcase.length();
2376    if (length <= bestMatchLength) {
2377        // data cannot provide a better match.
2378        return FALSE;
2379    }
2380
2381    if (lcaseText.compareBetween(0, length, lcase, 0, length) == 0) {
2382        // normal match
2383        bestMatchName = lcase;
2384        bestMatchLength = length;
2385        return TRUE;
2386    }
2387    if (lcase.charAt(--length) == 0x2e) {
2388        if (lcaseText.compareBetween(0, length, lcase, 0, length) == 0) {
2389            // The input text matches the data except for data's trailing dot.
2390            bestMatchName = lcase;
2391            bestMatchName.truncate(length);
2392            bestMatchLength = length;
2393            return TRUE;
2394        }
2395    }
2396    return FALSE;
2397}
2398
2399//----------------------------------------------------------------------
2400
2401void
2402SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
2403{
2404    parseAmbiguousDatesAsAfter(d, status);
2405}
2406
2407/**
2408 * Private member function that converts the parsed date strings into
2409 * timeFields. Returns -start (for ParsePosition) if failed.
2410 * @param text the time text to be parsed.
2411 * @param start where to start parsing.
2412 * @param ch the pattern character for the date field text to be parsed.
2413 * @param count the count of a pattern character.
2414 * @return the new start position if matching succeeded; a negative number
2415 * indicating matching failure, otherwise.
2416 */
2417int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
2418                           UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
2419                           int32_t patLoc, MessageFormat * numericLeapMonthFormatter) const
2420{
2421    Formattable number;
2422    int32_t value = 0;
2423    int32_t i;
2424    int32_t ps = 0;
2425    ParsePosition pos(0);
2426    UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
2427    NumberFormat *currentNumberFormat;
2428    UnicodeString temp;
2429    UBool lenient = isLenient();
2430    UBool gotNumber = FALSE;
2431
2432#if defined (U_DEBUG_CAL)
2433    //fprintf(stderr, "%s:%d - [%c]  st=%d \n", __FILE__, __LINE__, (char) ch, start);
2434#endif
2435
2436    if (patternCharIndex == UDAT_FIELD_COUNT) {
2437        return -start;
2438    }
2439
2440    currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
2441    UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
2442    UnicodeString hebr("hebr", 4, US_INV);
2443
2444    if (numericLeapMonthFormatter != NULL) {
2445        numericLeapMonthFormatter->setFormats((const Format **)&currentNumberFormat, 1);
2446    }
2447    UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0);
2448
2449    // If there are any spaces here, skip over them.  If we hit the end
2450    // of the string, then fail.
2451    for (;;) {
2452        if (start >= text.length()) {
2453            return -start;
2454        }
2455        UChar32 c = text.char32At(start);
2456        if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
2457            break;
2458        }
2459        start += U16_LENGTH(c);
2460    }
2461    pos.setIndex(start);
2462
2463    // We handle a few special cases here where we need to parse
2464    // a number value.  We handle further, more generic cases below.  We need
2465    // to handle some of them here because some fields require extra processing on
2466    // the parsed value.
2467    if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||                       // k
2468        patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||                       // H
2469        patternCharIndex == UDAT_HOUR1_FIELD ||                              // h
2470        patternCharIndex == UDAT_HOUR0_FIELD ||                              // K
2471        (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) ||          // e
2472        (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) ||     // c
2473        (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) ||              // M
2474        (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) ||   // L
2475        (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) ||            // Q
2476        (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q
2477        patternCharIndex == UDAT_YEAR_FIELD ||                               // y
2478        patternCharIndex == UDAT_YEAR_WOY_FIELD ||                           // Y
2479        patternCharIndex == UDAT_YEAR_NAME_FIELD ||                          // U (falls back to numeric)
2480        (patternCharIndex == UDAT_ERA_FIELD && isChineseCalendar) ||         // G
2481        patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)                    // S
2482    {
2483        int32_t parseStart = pos.getIndex();
2484        // It would be good to unify this with the obeyCount logic below,
2485        // but that's going to be difficult.
2486        const UnicodeString* src;
2487
2488        UBool parsedNumericLeapMonth = FALSE;
2489        if (numericLeapMonthFormatter != NULL && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) {
2490            int32_t argCount;
2491            Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount);
2492            if (args != NULL && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) {
2493                parsedNumericLeapMonth = TRUE;
2494                number.setLong(args[0].getLong());
2495                cal.set(UCAL_IS_LEAP_MONTH, 1);
2496                delete[] args;
2497            } else {
2498                pos.setIndex(parseStart);
2499                cal.set(UCAL_IS_LEAP_MONTH, 0);
2500            }
2501        }
2502
2503        if (!parsedNumericLeapMonth) {
2504            if (obeyCount) {
2505                if ((start+count) > text.length()) {
2506                    return -start;
2507                }
2508
2509                text.extractBetween(0, start + count, temp);
2510                src = &temp;
2511            } else {
2512                src = &text;
2513            }
2514
2515            parseInt(*src, number, pos, allowNegative,currentNumberFormat);
2516        }
2517
2518        int32_t txtLoc = pos.getIndex();
2519
2520        if (txtLoc > parseStart) {
2521            value = number.getLong();
2522            gotNumber = TRUE;
2523
2524            // suffix processing
2525            if (value < 0 ) {
2526                txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, TRUE);
2527                if (txtLoc != pos.getIndex()) {
2528                    value *= -1;
2529                }
2530            }
2531            else {
2532                txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, FALSE);
2533            }
2534
2535            if (!lenient) {
2536                // Check the range of the value
2537                int32_t bias = gFieldRangeBias[patternCharIndex];
2538                if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
2539                    return -start;
2540                }
2541            }
2542
2543            pos.setIndex(txtLoc);
2544        }
2545    }
2546
2547    // Make sure that we got a number if
2548    // we want one, and didn't get one
2549    // if we don't want one.
2550    switch (patternCharIndex) {
2551        case UDAT_HOUR_OF_DAY1_FIELD:
2552        case UDAT_HOUR_OF_DAY0_FIELD:
2553        case UDAT_HOUR1_FIELD:
2554        case UDAT_HOUR0_FIELD:
2555            // special range check for hours:
2556            if (value < 0 || value > 24) {
2557                return -start;
2558            }
2559
2560            // fall through to gotNumber check
2561
2562        case UDAT_YEAR_FIELD:
2563        case UDAT_YEAR_WOY_FIELD:
2564        case UDAT_FRACTIONAL_SECOND_FIELD:
2565            // these must be a number
2566            if (! gotNumber) {
2567                return -start;
2568            }
2569
2570            break;
2571
2572        default:
2573            // we check the rest of the fields below.
2574            break;
2575    }
2576
2577    switch (patternCharIndex) {
2578    case UDAT_ERA_FIELD:
2579        if (isChineseCalendar) {
2580            if (!gotNumber) {
2581                return -start;
2582            }
2583            cal.set(UCAL_ERA, value);
2584            return pos.getIndex();
2585        }
2586        if (count == 5) {
2587            ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, NULL, cal);
2588        } else if (count == 4) {
2589            ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, NULL, cal);
2590        } else {
2591            ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, NULL, cal);
2592        }
2593
2594        // check return position, if it equals -start, then matchString error
2595        // special case the return code so we don't necessarily fail out until we
2596        // verify no year information also
2597        if (ps == -start)
2598            ps--;
2599
2600        return ps;
2601
2602    case UDAT_YEAR_FIELD:
2603        // If there are 3 or more YEAR pattern characters, this indicates
2604        // that the year value is to be treated literally, without any
2605        // two-digit year adjustments (e.g., from "01" to 2001).  Otherwise
2606        // we made adjustments to place the 2-digit year in the proper
2607        // century, for parsed strings from "00" to "99".  Any other string
2608        // is treated literally:  "2250", "-1", "1", "002".
2609        if (fDateOverride.compare(hebr)==0 && value < 1000) {
2610            value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
2611        } else if ((pos.getIndex() - start) == 2 && !isChineseCalendar
2612            && u_isdigit(text.charAt(start))
2613            && u_isdigit(text.charAt(start+1)))
2614        {
2615        	// only adjust year for patterns less than 3.
2616        	if(count < 3) {
2617        		// Assume for example that the defaultCenturyStart is 6/18/1903.
2618        		// This means that two-digit years will be forced into the range
2619        		// 6/18/1903 to 6/17/2003.  As a result, years 00, 01, and 02
2620        		// correspond to 2000, 2001, and 2002.  Years 04, 05, etc. correspond
2621        		// to 1904, 1905, etc.  If the year is 03, then it is 2003 if the
2622        		// other fields specify a date before 6/18, or 1903 if they specify a
2623        		// date afterwards.  As a result, 03 is an ambiguous year.  All other
2624        		// two-digit years are unambiguous.
2625        		if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
2626        			int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2627        			ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2628        			value += (fDefaultCenturyStartYear/100)*100 +
2629        					(value < ambiguousTwoDigitYear ? 100 : 0);
2630        		}
2631            }
2632        }
2633        cal.set(UCAL_YEAR, value);
2634
2635        // Delayed checking for adjustment of Hebrew month numbers in non-leap years.
2636        if (saveHebrewMonth >= 0) {
2637            HebrewCalendar *hc = (HebrewCalendar*)&cal;
2638            if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) {
2639               cal.set(UCAL_MONTH,saveHebrewMonth);
2640            } else {
2641               cal.set(UCAL_MONTH,saveHebrewMonth-1);
2642            }
2643            saveHebrewMonth = -1;
2644        }
2645        return pos.getIndex();
2646
2647    case UDAT_YEAR_WOY_FIELD:
2648        // Comment is the same as for UDAT_Year_FIELDs - look above
2649        if (fDateOverride.compare(hebr)==0 && value < 1000) {
2650            value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
2651        } else if ((pos.getIndex() - start) == 2
2652            && u_isdigit(text.charAt(start))
2653            && u_isdigit(text.charAt(start+1))
2654            && fHaveDefaultCentury )
2655        {
2656            int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2657            ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2658            value += (fDefaultCenturyStartYear/100)*100 +
2659                (value < ambiguousTwoDigitYear ? 100 : 0);
2660        }
2661        cal.set(UCAL_YEAR_WOY, value);
2662        return pos.getIndex();
2663
2664    case UDAT_YEAR_NAME_FIELD:
2665        if (fSymbols->fShortYearNames != NULL) {
2666            int32_t newStart = matchString(text, start, UCAL_YEAR, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount, NULL, cal);
2667            if (newStart > 0) {
2668                return newStart;
2669            }
2670        }
2671        if (gotNumber && (lenient || value > fSymbols->fShortYearNamesCount)) {
2672            cal.set(UCAL_YEAR, value);
2673            return pos.getIndex();
2674        }
2675        return -start;
2676
2677    case UDAT_MONTH_FIELD:
2678    case UDAT_STANDALONE_MONTH_FIELD:
2679        if (gotNumber) // i.e., M or MM.
2680        {
2681            // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether
2682            // or not it was a leap year.  We may or may not yet know what year it is, so might have to delay checking until
2683            // the year is parsed.
2684            if (!strcmp(cal.getType(),"hebrew")) {
2685                HebrewCalendar *hc = (HebrewCalendar*)&cal;
2686                if (cal.isSet(UCAL_YEAR)) {
2687                   UErrorCode status = U_ZERO_ERROR;
2688                   if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) {
2689                       cal.set(UCAL_MONTH, value);
2690                   } else {
2691                       cal.set(UCAL_MONTH, value - 1);
2692                   }
2693                } else {
2694                    saveHebrewMonth = value;
2695                }
2696            } else {
2697                // Don't want to parse the month if it is a string
2698                // while pattern uses numeric style: M/MM, L/LL
2699                // [We computed 'value' above.]
2700                cal.set(UCAL_MONTH, value - 1);
2701            }
2702            return pos.getIndex();
2703        } else {
2704            // count >= 3 // i.e., MMM/MMMM, LLL/LLLL
2705            // Want to be able to parse both short and long forms.
2706            // Try count == 4 first:
2707            UnicodeString * wideMonthPat = NULL;
2708            UnicodeString * shortMonthPat = NULL;
2709            if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
2710                if (patternCharIndex==UDAT_MONTH_FIELD) {
2711                    wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide];
2712                    shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev];
2713                } else {
2714                    wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide];
2715                    shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev];
2716                }
2717            }
2718            int32_t newStart = 0;
2719            if (patternCharIndex==UDAT_MONTH_FIELD) {
2720                newStart = matchString(text, start, UCAL_MONTH, fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal); // try MMMM
2721                if (newStart > 0) {
2722                    return newStart;
2723                }
2724                newStart = matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal); // try MMM
2725            } else {
2726                newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal); // try LLLL
2727                if (newStart > 0) {
2728                    return newStart;
2729                }
2730                newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL
2731            }
2732            if (newStart > 0 || !lenient)  // currently we do not try to parse MMMMM/LLLLL: #8860
2733                return newStart;
2734            // else we allowing parsing as number, below
2735        }
2736        break;
2737
2738    case UDAT_HOUR_OF_DAY1_FIELD:
2739        // [We computed 'value' above.]
2740        if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
2741            value = 0;
2742
2743        // fall through to set field
2744
2745    case UDAT_HOUR_OF_DAY0_FIELD:
2746        cal.set(UCAL_HOUR_OF_DAY, value);
2747        return pos.getIndex();
2748
2749    case UDAT_FRACTIONAL_SECOND_FIELD:
2750        // Fractional seconds left-justify
2751        i = pos.getIndex() - start;
2752        if (i < 3) {
2753            while (i < 3) {
2754                value *= 10;
2755                i++;
2756            }
2757        } else {
2758            int32_t a = 1;
2759            while (i > 3) {
2760                a *= 10;
2761                i--;
2762            }
2763            value /= a;
2764        }
2765        cal.set(UCAL_MILLISECOND, value);
2766        return pos.getIndex();
2767
2768    case UDAT_DOW_LOCAL_FIELD:
2769        if (gotNumber) // i.e., e or ee
2770        {
2771            // [We computed 'value' above.]
2772            cal.set(UCAL_DOW_LOCAL, value);
2773            return pos.getIndex();
2774        }
2775        // else for eee-eeeee fall through to handling of EEE-EEEEE
2776        // fall through, do not break here
2777    case UDAT_DAY_OF_WEEK_FIELD:
2778        {
2779            // Want to be able to parse both short and long forms.
2780            // Try count == 4 (EEEE) wide first:
2781            int32_t newStart = 0;
2782            if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2783                                      fSymbols->fWeekdays, fSymbols->fWeekdaysCount, NULL, cal)) > 0)
2784                return newStart;
2785            // EEEE wide failed, now try EEE abbreviated
2786            else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2787                                   fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, NULL, cal)) > 0)
2788                return newStart;
2789            // EEE abbreviated failed, now try EEEEEE short
2790            else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2791                                   fSymbols->fShorterWeekdays, fSymbols->fShorterWeekdaysCount, NULL, cal)) > 0)
2792                return newStart;
2793            // EEEEEE short failed, now try EEEEE narrow
2794            else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2795                                   fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, NULL, cal)) > 0)
2796                return newStart;
2797            else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
2798                return newStart;
2799            // else we allowing parsing as number, below
2800        }
2801        break;
2802
2803    case UDAT_STANDALONE_DAY_FIELD:
2804        {
2805            if (gotNumber) // c or cc
2806            {
2807                // [We computed 'value' above.]
2808                cal.set(UCAL_DOW_LOCAL, value);
2809                return pos.getIndex();
2810            }
2811            // Want to be able to parse both short and long forms.
2812            // Try count == 4 (cccc) first:
2813            int32_t newStart = 0;
2814            if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2815                                      fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, NULL, cal)) > 0)
2816                return newStart;
2817            else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2818                                          fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, NULL, cal)) > 0)
2819                return newStart;
2820            else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2821                                          fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, NULL, cal)) > 0)
2822                return newStart;
2823            else if (!lenient)
2824                return newStart;
2825            // else we allowing parsing as number, below
2826        }
2827        break;
2828
2829    case UDAT_AM_PM_FIELD:
2830        return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, NULL, cal);
2831
2832    case UDAT_HOUR1_FIELD:
2833        // [We computed 'value' above.]
2834        if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
2835            value = 0;
2836
2837        // fall through to set field
2838
2839    case UDAT_HOUR0_FIELD:
2840        cal.set(UCAL_HOUR, value);
2841        return pos.getIndex();
2842
2843    case UDAT_QUARTER_FIELD:
2844        if (gotNumber) // i.e., Q or QQ.
2845        {
2846            // Don't want to parse the month if it is a string
2847            // while pattern uses numeric style: Q or QQ.
2848            // [We computed 'value' above.]
2849            cal.set(UCAL_MONTH, (value - 1) * 3);
2850            return pos.getIndex();
2851        } else {
2852            // count >= 3 // i.e., QQQ or QQQQ
2853            // Want to be able to parse both short and long forms.
2854            // Try count == 4 first:
2855            int32_t newStart = 0;
2856
2857            if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2858                                      fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
2859                return newStart;
2860            else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2861                                          fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
2862                return newStart;
2863            else if (!lenient)
2864                return newStart;
2865            // else we allowing parsing as number, below
2866        }
2867        break;
2868
2869    case UDAT_STANDALONE_QUARTER_FIELD:
2870        if (gotNumber) // i.e., q or qq.
2871        {
2872            // Don't want to parse the month if it is a string
2873            // while pattern uses numeric style: q or q.
2874            // [We computed 'value' above.]
2875            cal.set(UCAL_MONTH, (value - 1) * 3);
2876            return pos.getIndex();
2877        } else {
2878            // count >= 3 // i.e., qqq or qqqq
2879            // Want to be able to parse both short and long forms.
2880            // Try count == 4 first:
2881            int32_t newStart = 0;
2882
2883            if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2884                                      fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
2885                return newStart;
2886            else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2887                                          fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
2888                return newStart;
2889            else if (!lenient)
2890                return newStart;
2891            // else we allowing parsing as number, below
2892        }
2893        break;
2894
2895    case UDAT_TIMEZONE_FIELD: // 'z'
2896        {
2897            UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2898            UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_SPECIFIC_SHORT : UTZFMT_STYLE_SPECIFIC_LONG;
2899            TimeZone *tz  = tzFormat()->parse(style, text, pos, &tzTimeType);
2900            if (tz != NULL) {
2901                ((SimpleDateFormat*)this)->tztype = tzTimeType;
2902                cal.adoptTimeZone(tz);
2903                return pos.getIndex();
2904            }
2905        }
2906        break;
2907    case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
2908        {
2909            UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2910            UTimeZoneFormatStyle style = (count < 4) ?
2911                UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL : ((count == 5) ? UTZFMT_STYLE_ISO_EXTENDED_FULL: UTZFMT_STYLE_LOCALIZED_GMT);
2912            TimeZone *tz  = tzFormat()->parse(style, text, pos, &tzTimeType);
2913            if (tz != NULL) {
2914                ((SimpleDateFormat*)this)->tztype = tzTimeType;
2915                cal.adoptTimeZone(tz);
2916                return pos.getIndex();
2917            }
2918            return -start;
2919        }
2920    case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
2921        {
2922            UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2923            UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_GENERIC_SHORT : UTZFMT_STYLE_GENERIC_LONG;
2924            TimeZone *tz  = tzFormat()->parse(style, text, pos, &tzTimeType);
2925            if (tz != NULL) {
2926                ((SimpleDateFormat*)this)->tztype = tzTimeType;
2927                cal.adoptTimeZone(tz);
2928                return pos.getIndex();
2929            }
2930            return -start;
2931        }
2932    case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
2933        {
2934            UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2935            UTimeZoneFormatStyle style;
2936            switch (count) {
2937            case 1:
2938                style = UTZFMT_STYLE_ZONE_ID_SHORT;
2939                break;
2940            case 2:
2941                style = UTZFMT_STYLE_ZONE_ID;
2942                break;
2943            case 3:
2944                style = UTZFMT_STYLE_EXEMPLAR_LOCATION;
2945                break;
2946            default:
2947                style = UTZFMT_STYLE_GENERIC_LOCATION;
2948                break;
2949            }
2950            TimeZone *tz  = tzFormat()->parse(style, text, pos, &tzTimeType);
2951            if (tz != NULL) {
2952                ((SimpleDateFormat*)this)->tztype = tzTimeType;
2953                cal.adoptTimeZone(tz);
2954                return pos.getIndex();
2955            }
2956            return -start;
2957        }
2958    case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
2959        {
2960            UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2961            UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_LOCALIZED_GMT_SHORT : UTZFMT_STYLE_LOCALIZED_GMT;
2962            TimeZone *tz  = tzFormat()->parse(style, text, pos, &tzTimeType);
2963            if (tz != NULL) {
2964                ((SimpleDateFormat*)this)->tztype = tzTimeType;
2965                cal.adoptTimeZone(tz);
2966                return pos.getIndex();
2967            }
2968            return -start;
2969        }
2970    case UDAT_TIMEZONE_ISO_FIELD: // 'X'
2971        {
2972            UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2973            UTimeZoneFormatStyle style;
2974            switch (count) {
2975            case 1:
2976                style = UTZFMT_STYLE_ISO_BASIC_SHORT;
2977                break;
2978            case 2:
2979                style = UTZFMT_STYLE_ISO_BASIC_FIXED;
2980                break;
2981            case 3:
2982                style = UTZFMT_STYLE_ISO_EXTENDED_FIXED;
2983                break;
2984            case 4:
2985                style = UTZFMT_STYLE_ISO_BASIC_FULL;
2986                break;
2987            default:
2988                style = UTZFMT_STYLE_ISO_EXTENDED_FULL;
2989                break;
2990            }
2991            TimeZone *tz  = tzFormat()->parse(style, text, pos, &tzTimeType);
2992            if (tz != NULL) {
2993                ((SimpleDateFormat*)this)->tztype = tzTimeType;
2994                cal.adoptTimeZone(tz);
2995                return pos.getIndex();
2996            }
2997            return -start;
2998        }
2999    case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
3000        {
3001            UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
3002            UTimeZoneFormatStyle style;
3003            switch (count) {
3004            case 1:
3005                style = UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT;
3006                break;
3007            case 2:
3008                style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED;
3009                break;
3010            case 3:
3011                style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED;
3012                break;
3013            case 4:
3014                style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL;
3015                break;
3016            default:
3017                style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL;
3018                break;
3019            }
3020            TimeZone *tz  = tzFormat()->parse(style, text, pos, &tzTimeType);
3021            if (tz != NULL) {
3022                ((SimpleDateFormat*)this)->tztype = tzTimeType;
3023                cal.adoptTimeZone(tz);
3024                return pos.getIndex();
3025            }
3026            return -start;
3027        }
3028
3029    default:
3030        // Handle "generic" fields
3031        // this is now handled below, outside the switch block
3032        break;
3033    }
3034    // Handle "generic" fields:
3035    // switch default case now handled here (outside switch block) to allow
3036    // parsing of some string fields as digits for lenient case
3037
3038    int32_t parseStart = pos.getIndex();
3039    const UnicodeString* src;
3040    if (obeyCount) {
3041        if ((start+count) > text.length()) {
3042            return -start;
3043        }
3044        text.extractBetween(0, start + count, temp);
3045        src = &temp;
3046    } else {
3047        src = &text;
3048    }
3049    parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3050    if (pos.getIndex() != parseStart) {
3051        int32_t value = number.getLong();
3052
3053        // Don't need suffix processing here (as in number processing at the beginning of the function);
3054        // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
3055
3056        if (!lenient) {
3057            // Check the range of the value
3058            int32_t bias = gFieldRangeBias[patternCharIndex];
3059            if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
3060                return -start;
3061            }
3062        }
3063
3064        // For the following, need to repeat some of the "if (gotNumber)" code above:
3065        // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD,
3066        // UDAT_[STANDALONE_]QUARTER_FIELD
3067        switch (patternCharIndex) {
3068        case UDAT_MONTH_FIELD:
3069            // See notes under UDAT_MONTH_FIELD case above
3070            if (!strcmp(cal.getType(),"hebrew")) {
3071                HebrewCalendar *hc = (HebrewCalendar*)&cal;
3072                if (cal.isSet(UCAL_YEAR)) {
3073                   UErrorCode status = U_ZERO_ERROR;
3074                   if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) {
3075                       cal.set(UCAL_MONTH, value);
3076                   } else {
3077                       cal.set(UCAL_MONTH, value - 1);
3078                   }
3079                } else {
3080                    saveHebrewMonth = value;
3081                }
3082            } else {
3083                cal.set(UCAL_MONTH, value - 1);
3084            }
3085            break;
3086        case UDAT_STANDALONE_MONTH_FIELD:
3087            cal.set(UCAL_MONTH, value - 1);
3088            break;
3089        case UDAT_DOW_LOCAL_FIELD:
3090        case UDAT_STANDALONE_DAY_FIELD:
3091            cal.set(UCAL_DOW_LOCAL, value);
3092            break;
3093        case UDAT_QUARTER_FIELD:
3094        case UDAT_STANDALONE_QUARTER_FIELD:
3095             cal.set(UCAL_MONTH, (value - 1) * 3);
3096             break;
3097        default:
3098            cal.set(field, value);
3099            break;
3100        }
3101        return pos.getIndex();
3102    }
3103    return -start;
3104}
3105
3106/**
3107 * Parse an integer using fNumberFormat.  This method is semantically
3108 * const, but actually may modify fNumberFormat.
3109 */
3110void SimpleDateFormat::parseInt(const UnicodeString& text,
3111                                Formattable& number,
3112                                ParsePosition& pos,
3113                                UBool allowNegative,
3114                                NumberFormat *fmt) const {
3115    parseInt(text, number, -1, pos, allowNegative,fmt);
3116}
3117
3118/**
3119 * Parse an integer using fNumberFormat up to maxDigits.
3120 */
3121void SimpleDateFormat::parseInt(const UnicodeString& text,
3122                                Formattable& number,
3123                                int32_t maxDigits,
3124                                ParsePosition& pos,
3125                                UBool allowNegative,
3126                                NumberFormat *fmt) const {
3127    UnicodeString oldPrefix;
3128    DecimalFormat* df = NULL;
3129    if (!allowNegative && (df = dynamic_cast<DecimalFormat*>(fmt)) != NULL) {
3130        df->getNegativePrefix(oldPrefix);
3131        df->setNegativePrefix(UnicodeString(TRUE, SUPPRESS_NEGATIVE_PREFIX, -1));
3132    }
3133    int32_t oldPos = pos.getIndex();
3134    fmt->parse(text, number, pos);
3135    if (df != NULL) {
3136        df->setNegativePrefix(oldPrefix);
3137    }
3138
3139    if (maxDigits > 0) {
3140        // adjust the result to fit into
3141        // the maxDigits and move the position back
3142        int32_t nDigits = pos.getIndex() - oldPos;
3143        if (nDigits > maxDigits) {
3144            int32_t val = number.getLong();
3145            nDigits -= maxDigits;
3146            while (nDigits > 0) {
3147                val /= 10;
3148                nDigits--;
3149            }
3150            pos.setIndex(oldPos + maxDigits);
3151            number.setLong(val);
3152        }
3153    }
3154}
3155
3156//----------------------------------------------------------------------
3157
3158void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
3159                                        UnicodeString& translatedPattern,
3160                                        const UnicodeString& from,
3161                                        const UnicodeString& to,
3162                                        UErrorCode& status)
3163{
3164  // run through the pattern and convert any pattern symbols from the version
3165  // in "from" to the corresponding character ion "to".  This code takes
3166  // quoted strings into account (it doesn't try to translate them), and it signals
3167  // an error if a particular "pattern character" doesn't appear in "from".
3168  // Depending on the values of "from" and "to" this can convert from generic
3169  // to localized patterns or localized to generic.
3170  if (U_FAILURE(status))
3171    return;
3172
3173  translatedPattern.remove();
3174  UBool inQuote = FALSE;
3175  for (int32_t i = 0; i < originalPattern.length(); ++i) {
3176    UChar c = originalPattern[i];
3177    if (inQuote) {
3178      if (c == QUOTE)
3179    inQuote = FALSE;
3180    }
3181    else {
3182      if (c == QUOTE)
3183    inQuote = TRUE;
3184      else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/
3185           || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) {
3186    int32_t ci = from.indexOf(c);
3187    if (ci == -1) {
3188      status = U_INVALID_FORMAT_ERROR;
3189      return;
3190    }
3191    c = to[ci];
3192      }
3193    }
3194    translatedPattern += c;
3195  }
3196  if (inQuote) {
3197    status = U_INVALID_FORMAT_ERROR;
3198    return;
3199  }
3200}
3201
3202//----------------------------------------------------------------------
3203
3204UnicodeString&
3205SimpleDateFormat::toPattern(UnicodeString& result) const
3206{
3207    result = fPattern;
3208    return result;
3209}
3210
3211//----------------------------------------------------------------------
3212
3213UnicodeString&
3214SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
3215                                     UErrorCode& status) const
3216{
3217    translatePattern(fPattern, result,
3218                     UnicodeString(DateFormatSymbols::getPatternUChars()),
3219                     fSymbols->fLocalPatternChars, status);
3220    return result;
3221}
3222
3223//----------------------------------------------------------------------
3224
3225void
3226SimpleDateFormat::applyPattern(const UnicodeString& pattern)
3227{
3228    fPattern = pattern;
3229}
3230
3231//----------------------------------------------------------------------
3232
3233void
3234SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
3235                                        UErrorCode &status)
3236{
3237    translatePattern(pattern, fPattern,
3238                     fSymbols->fLocalPatternChars,
3239                     UnicodeString(DateFormatSymbols::getPatternUChars()), status);
3240}
3241
3242//----------------------------------------------------------------------
3243
3244const DateFormatSymbols*
3245SimpleDateFormat::getDateFormatSymbols() const
3246{
3247    return fSymbols;
3248}
3249
3250//----------------------------------------------------------------------
3251
3252void
3253SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
3254{
3255    delete fSymbols;
3256    fSymbols = newFormatSymbols;
3257}
3258
3259//----------------------------------------------------------------------
3260void
3261SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
3262{
3263    delete fSymbols;
3264    fSymbols = new DateFormatSymbols(newFormatSymbols);
3265}
3266
3267//----------------------------------------------------------------------
3268const TimeZoneFormat*
3269SimpleDateFormat::getTimeZoneFormat(void) const {
3270    return (const TimeZoneFormat*)tzFormat();
3271}
3272
3273//----------------------------------------------------------------------
3274void
3275SimpleDateFormat::adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt)
3276{
3277    delete fTimeZoneFormat;
3278    fTimeZoneFormat = timeZoneFormatToAdopt;
3279}
3280
3281//----------------------------------------------------------------------
3282void
3283SimpleDateFormat::setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat)
3284{
3285    delete fTimeZoneFormat;
3286    fTimeZoneFormat = new TimeZoneFormat(newTimeZoneFormat);
3287}
3288
3289//----------------------------------------------------------------------
3290
3291
3292void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
3293{
3294  UErrorCode status = U_ZERO_ERROR;
3295  DateFormat::adoptCalendar(calendarToAdopt);
3296  delete fSymbols;
3297  fSymbols=NULL;
3298  initializeSymbols(fLocale, fCalendar, status);  // we need new symbols
3299  initializeDefaultCentury();  // we need a new century (possibly)
3300}
3301
3302
3303//----------------------------------------------------------------------
3304
3305
3306void SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status)
3307{
3308    if (U_FAILURE(status))
3309        return;
3310    if ( (UDisplayContextType)((uint32_t)value >> 8) == UDISPCTX_TYPE_CAPITALIZATION ) {
3311        fCapitalizationContext = value;
3312    } else {
3313        status = U_ILLEGAL_ARGUMENT_ERROR;
3314   }
3315}
3316
3317
3318//----------------------------------------------------------------------
3319
3320
3321UDisplayContext SimpleDateFormat::getContext(UDisplayContextType type, UErrorCode& status) const
3322{
3323    if (U_FAILURE(status))
3324        return (UDisplayContext)0;
3325    if (type != UDISPCTX_TYPE_CAPITALIZATION) {
3326        status = U_ILLEGAL_ARGUMENT_ERROR;
3327        return (UDisplayContext)0;
3328    }
3329    return fCapitalizationContext;
3330}
3331
3332
3333//----------------------------------------------------------------------
3334
3335
3336UBool
3337SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
3338    return isFieldUnitIgnored(fPattern, field);
3339}
3340
3341
3342UBool
3343SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
3344                                     UCalendarDateFields field) {
3345    int32_t fieldLevel = fgCalendarFieldToLevel[field];
3346    int32_t level;
3347    UChar ch;
3348    UBool inQuote = FALSE;
3349    UChar prevCh = 0;
3350    int32_t count = 0;
3351
3352    for (int32_t i = 0; i < pattern.length(); ++i) {
3353        ch = pattern[i];
3354        if (ch != prevCh && count > 0) {
3355            level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
3356            // the larger the level, the smaller the field unit.
3357            if ( fieldLevel <= level ) {
3358                return FALSE;
3359            }
3360            count = 0;
3361        }
3362        if (ch == QUOTE) {
3363            if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
3364                ++i;
3365            } else {
3366                inQuote = ! inQuote;
3367            }
3368        }
3369        else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
3370                    || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
3371            prevCh = ch;
3372            ++count;
3373        }
3374    }
3375    if ( count > 0 ) {
3376        // last item
3377        level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
3378            if ( fieldLevel <= level ) {
3379                return FALSE;
3380            }
3381    }
3382    return TRUE;
3383}
3384
3385//----------------------------------------------------------------------
3386
3387const Locale&
3388SimpleDateFormat::getSmpFmtLocale(void) const {
3389    return fLocale;
3390}
3391
3392//----------------------------------------------------------------------
3393
3394int32_t
3395SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start,
3396                                 int32_t patLoc, UBool isNegative) const {
3397    // local variables
3398    UnicodeString suf;
3399    int32_t patternMatch;
3400    int32_t textPreMatch;
3401    int32_t textPostMatch;
3402
3403    // check that we are still in range
3404    if ( (start > text.length()) ||
3405         (start < 0) ||
3406         (patLoc < 0) ||
3407         (patLoc > fPattern.length())) {
3408        // out of range, don't advance location in text
3409        return start;
3410    }
3411
3412    // get the suffix
3413    DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
3414    if (decfmt != NULL) {
3415        if (isNegative) {
3416            suf = decfmt->getNegativeSuffix(suf);
3417        }
3418        else {
3419            suf = decfmt->getPositiveSuffix(suf);
3420        }
3421    }
3422
3423    // check for suffix
3424    if (suf.length() <= 0) {
3425        return start;
3426    }
3427
3428    // check suffix will be encountered in the pattern
3429    patternMatch = compareSimpleAffix(suf,fPattern,patLoc);
3430
3431    // check if a suffix will be encountered in the text
3432    textPreMatch = compareSimpleAffix(suf,text,start);
3433
3434    // check if a suffix was encountered in the text
3435    textPostMatch = compareSimpleAffix(suf,text,start-suf.length());
3436
3437    // check for suffix match
3438    if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) {
3439        return start;
3440    }
3441    else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) {
3442        return  start - suf.length();
3443    }
3444
3445    // should not get here
3446    return start;
3447}
3448
3449//----------------------------------------------------------------------
3450
3451int32_t
3452SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
3453                   const UnicodeString& input,
3454                   int32_t pos) const {
3455    int32_t start = pos;
3456    for (int32_t i=0; i<affix.length(); ) {
3457        UChar32 c = affix.char32At(i);
3458        int32_t len = U16_LENGTH(c);
3459        if (PatternProps::isWhiteSpace(c)) {
3460            // We may have a pattern like: \u200F \u0020
3461            //        and input text like: \u200F \u0020
3462            // Note that U+200F and U+0020 are Pattern_White_Space but only
3463            // U+0020 is UWhiteSpace.  So we have to first do a direct
3464            // match of the run of Pattern_White_Space in the pattern,
3465            // then match any extra characters.
3466            UBool literalMatch = FALSE;
3467            while (pos < input.length() &&
3468                   input.char32At(pos) == c) {
3469                literalMatch = TRUE;
3470                i += len;
3471                pos += len;
3472                if (i == affix.length()) {
3473                    break;
3474                }
3475                c = affix.char32At(i);
3476                len = U16_LENGTH(c);
3477                if (!PatternProps::isWhiteSpace(c)) {
3478                    break;
3479                }
3480            }
3481
3482            // Advance over run in pattern
3483            i = skipPatternWhiteSpace(affix, i);
3484
3485            // Advance over run in input text
3486            // Must see at least one white space char in input,
3487            // unless we've already matched some characters literally.
3488            int32_t s = pos;
3489            pos = skipUWhiteSpace(input, pos);
3490            if (pos == s && !literalMatch) {
3491                return -1;
3492            }
3493
3494            // If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
3495            // Otherwise, the previous lines may have skipped over text (such as U+00A0) that
3496            // is also in the affix.
3497            i = skipUWhiteSpace(affix, i);
3498        } else {
3499            if (pos < input.length() &&
3500                input.char32At(pos) == c) {
3501                i += len;
3502                pos += len;
3503            } else {
3504                return -1;
3505            }
3506        }
3507    }
3508    return pos - start;
3509}
3510
3511//----------------------------------------------------------------------
3512
3513int32_t
3514SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const {
3515    const UChar* s = text.getBuffer();
3516    return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s);
3517}
3518
3519//----------------------------------------------------------------------
3520
3521int32_t
3522SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const {
3523    while (pos < text.length()) {
3524        UChar32 c = text.char32At(pos);
3525        if (!u_isUWhiteSpace(c)) {
3526            break;
3527        }
3528        pos += U16_LENGTH(c);
3529    }
3530    return pos;
3531}
3532
3533//----------------------------------------------------------------------
3534
3535// Lazy TimeZoneFormat instantiation, semantically const.
3536TimeZoneFormat *
3537SimpleDateFormat::tzFormat() const {
3538    if (fTimeZoneFormat == NULL) {
3539        umtx_lock(&LOCK);
3540        {
3541            if (fTimeZoneFormat == NULL) {
3542                UErrorCode status = U_ZERO_ERROR;
3543                TimeZoneFormat *tzfmt = TimeZoneFormat::createInstance(fLocale, status);
3544                if (U_FAILURE(status)) {
3545                    return NULL;
3546                }
3547
3548                const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat = tzfmt;
3549            }
3550        }
3551        umtx_unlock(&LOCK);
3552    }
3553    return fTimeZoneFormat;
3554}
3555
3556U_NAMESPACE_END
3557
3558#endif /* #if !UCONFIG_NO_FORMATTING */
3559
3560//eof
3561