1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2002-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9//
10//   dcfmtest.cpp
11//
12//     Decimal Formatter tests, data driven.
13//
14
15#include "intltest.h"
16
17#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_REGULAR_EXPRESSIONS
18
19#include "unicode/regex.h"
20#include "unicode/uchar.h"
21#include "unicode/ustring.h"
22#include "unicode/unistr.h"
23#include "unicode/dcfmtsym.h"
24#include "unicode/decimfmt.h"
25#include "unicode/locid.h"
26#include "cmemory.h"
27#include "dcfmtest.h"
28#include "util.h"
29#include "cstring.h"
30#include <stdlib.h>
31#include <string.h>
32#include <stdio.h>
33
34#if !defined(_MSC_VER)
35namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364
36#endif
37
38#include <string>
39#include <iostream>
40
41//---------------------------------------------------------------------------
42//
43//  Test class boilerplate
44//
45//---------------------------------------------------------------------------
46DecimalFormatTest::DecimalFormatTest()
47{
48}
49
50
51DecimalFormatTest::~DecimalFormatTest()
52{
53}
54
55
56
57void DecimalFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
58{
59    if (exec) logln("TestSuite DecimalFormatTest: ");
60    switch (index) {
61
62#if !UCONFIG_NO_FILE_IO
63        case 0: name = "DataDrivenTests";
64            if (exec) DataDrivenTests();
65            break;
66#else
67        case 0: name = "skip";
68            break;
69#endif
70
71        default: name = "";
72            break; //needed to end loop
73    }
74}
75
76
77//---------------------------------------------------------------------------
78//
79//   Error Checking / Reporting macros used in all of the tests.
80//
81//---------------------------------------------------------------------------
82#define DF_CHECK_STATUS {if (U_FAILURE(status)) \
83    {dataerrln("DecimalFormatTest failure at line %d.  status=%s", \
84    __LINE__, u_errorName(status)); return 0;}}
85
86#define DF_ASSERT(expr) {if ((expr)==FALSE) {errln("DecimalFormatTest failure at line %d.\n", __LINE__);};}
87
88#define DF_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
89if (status!=errcode) {dataerrln("DecimalFormatTest failure at line %d.  Expected status=%s, got %s", \
90    __LINE__, u_errorName(errcode), u_errorName(status));};}
91
92#define DF_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
93    "DecimalFormatTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); }}
94
95#define DF_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
96    errln("DecimalFormatTest failure at line %d, from %d.", __LINE__, (line)); return;}}
97
98
99
100//
101//  InvariantStringPiece
102//    Wrap a StringPiece around the extracted invariant data of a UnicodeString.
103//    The data is guaranteed to be nul terminated.  (This is not true of StringPiece
104//    in general, but is true of InvariantStringPiece)
105//
106class InvariantStringPiece: public StringPiece {
107  public:
108    InvariantStringPiece(const UnicodeString &s);
109    ~InvariantStringPiece() {};
110  private:
111    MaybeStackArray<char, 20>  buf;
112};
113
114InvariantStringPiece::InvariantStringPiece(const UnicodeString &s) {
115    int32_t  len = s.length();
116    if (len+1 > buf.getCapacity()) {
117        buf.resize(len+1);
118    }
119    // Buffer size is len+1 so that s.extract() will nul-terminate the string.
120    s.extract(0, len, buf.getAlias(), len+1, US_INV);
121    this->set(buf.getAlias(), len);
122}
123
124
125//  UnicodeStringPiece
126//    Wrap a StringPiece around the extracted (to the default charset) data of
127//    a UnicodeString.  The extracted data is guaranteed to be nul terminated.
128//    (This is not true of StringPiece in general, but is true of UnicodeStringPiece)
129//
130class UnicodeStringPiece: public StringPiece {
131  public:
132    UnicodeStringPiece(const UnicodeString &s);
133    ~UnicodeStringPiece() {};
134  private:
135    MaybeStackArray<char, 20>  buf;
136};
137
138UnicodeStringPiece::UnicodeStringPiece(const UnicodeString &s) {
139    int32_t  len = s.length();
140    int32_t  capacity = buf.getCapacity();
141    int32_t requiredCapacity = s.extract(0, len, buf.getAlias(), capacity) + 1;
142    if (capacity < requiredCapacity) {
143        buf.resize(requiredCapacity);
144        capacity = requiredCapacity;
145        s.extract(0, len, buf.getAlias(), capacity);
146    }
147    this->set(buf.getAlias(), requiredCapacity - 1);
148}
149
150
151
152//---------------------------------------------------------------------------
153//
154//      DataDrivenTests
155//             The test cases are in a separate data file,
156//
157//---------------------------------------------------------------------------
158
159// Translate a Formattable::type enum value to a string, for error message formatting.
160static const char *formattableType(Formattable::Type typ) {
161    static const char *types[] = {"kDate",
162                                  "kDouble",
163                                  "kLong",
164                                  "kString",
165                                  "kArray",
166                                  "kInt64",
167                                  "kObject"
168                                  };
169    if (typ<0 || typ>Formattable::kObject) {
170        return "Unknown";
171    }
172    return types[typ];
173}
174
175const char *
176DecimalFormatTest::getPath(char *buffer, const char *filename) {
177    UErrorCode status=U_ZERO_ERROR;
178    const char *testDataDirectory = IntlTest::getSourceTestData(status);
179    DF_CHECK_STATUS;
180
181    strcpy(buffer, testDataDirectory);
182    strcat(buffer, filename);
183    return buffer;
184}
185
186void DecimalFormatTest::DataDrivenTests() {
187    char tdd[2048];
188    const char *srcPath;
189    UErrorCode  status  = U_ZERO_ERROR;
190    int32_t     lineNum = 0;
191
192    //
193    //  Open and read the test data file.
194    //
195    srcPath=getPath(tdd, "dcfmtest.txt");
196    if(srcPath==NULL) {
197        return; /* something went wrong, error already output */
198    }
199
200    int32_t    len;
201    UChar *testData = ReadAndConvertFile(srcPath, len, status);
202    if (U_FAILURE(status)) {
203        return; /* something went wrong, error already output */
204    }
205
206    //
207    //  Put the test data into a UnicodeString
208    //
209    UnicodeString testString(FALSE, testData, len);
210
211    RegexMatcher    parseLineMat(UnicodeString(
212            "(?i)\\s*parse\\s+"
213            "\"([^\"]*)\"\\s+"           // Capture group 1: input text
214            "([ild])\\s+"                // Capture group 2: expected parsed type
215            "\"([^\"]*)\"\\s+"           // Capture group 3: expected parsed decimal
216            "\\s*(?:#.*)?"),             // Trailing comment
217         0, status);
218
219    RegexMatcher    formatLineMat(UnicodeString(
220            "(?i)\\s*format\\s+"
221            "(\\S+)\\s+"                 // Capture group 1: pattern
222            "(ceiling|floor|down|up|halfeven|halfdown|halfup|default|unnecessary)\\s+"  // Capture group 2: Rounding Mode
223            "\"([^\"]*)\"\\s+"           // Capture group 3: input
224            "\"([^\"]*)\""               // Capture group 4: expected output
225            "\\s*(?:#.*)?"),             // Trailing comment
226         0, status);
227
228    RegexMatcher    commentMat    (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
229    RegexMatcher    lineMat(UNICODE_STRING_SIMPLE("(?m)^(.*?)$"), testString, 0, status);
230
231    if (U_FAILURE(status)){
232        dataerrln("Construct RegexMatcher() error.");
233        delete [] testData;
234        return;
235    }
236
237    //
238    //  Loop over the test data file, once per line.
239    //
240    while (lineMat.find()) {
241        lineNum++;
242        if (U_FAILURE(status)) {
243            dataerrln("File dcfmtest.txt, line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
244        }
245
246        status = U_ZERO_ERROR;
247        UnicodeString testLine = lineMat.group(1, status);
248        // printf("%s\n", UnicodeStringPiece(testLine).data());
249        if (testLine.length() == 0) {
250            continue;
251        }
252
253        //
254        // Parse the test line.  Skip blank and comment only lines.
255        // Separate out the three main fields - pattern, flags, target.
256        //
257
258        commentMat.reset(testLine);
259        if (commentMat.lookingAt(status)) {
260            // This line is a comment, or blank.
261            continue;
262        }
263
264
265        //
266        //  Handle "parse" test case line from file
267        //
268        parseLineMat.reset(testLine);
269        if (parseLineMat.lookingAt(status)) {
270            execParseTest(lineNum,
271                          parseLineMat.group(1, status),    // input
272                          parseLineMat.group(2, status),    // Expected Type
273                          parseLineMat.group(3, status),    // Expected Decimal String
274                          status
275                          );
276            continue;
277        }
278
279        //
280        //  Handle "format" test case line
281        //
282        formatLineMat.reset(testLine);
283        if (formatLineMat.lookingAt(status)) {
284            execFormatTest(lineNum,
285                           formatLineMat.group(1, status),    // Pattern
286                           formatLineMat.group(2, status),    // rounding mode
287                           formatLineMat.group(3, status),    // input decimal number
288                           formatLineMat.group(4, status),    // expected formatted result
289                           kFormattable,
290                           status);
291
292            execFormatTest(lineNum,
293                           formatLineMat.group(1, status),    // Pattern
294                           formatLineMat.group(2, status),    // rounding mode
295                           formatLineMat.group(3, status),    // input decimal number
296                           formatLineMat.group(4, status),    // expected formatted result
297                           kStringPiece,
298                           status);
299            continue;
300        }
301
302        //
303        //  Line is not a recognizable test case.
304        //
305        errln("Badly formed test case at line %d.\n%s\n",
306             lineNum, UnicodeStringPiece(testLine).data());
307
308    }
309
310    delete [] testData;
311}
312
313
314
315void DecimalFormatTest::execParseTest(int32_t lineNum,
316                                     const UnicodeString &inputText,
317                                     const UnicodeString &expectedType,
318                                     const UnicodeString &expectedDecimal,
319                                     UErrorCode &status) {
320
321    if (U_FAILURE(status)) {
322        return;
323    }
324
325    DecimalFormatSymbols symbols(Locale::getUS(), status);
326    UnicodeString pattern = UNICODE_STRING_SIMPLE("####");
327    DecimalFormat format(pattern, symbols, status);
328    Formattable   result;
329    if (U_FAILURE(status)) {
330        dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
331            lineNum, u_errorName(status));
332        return;
333    }
334
335    ParsePosition pos;
336    int32_t expectedParseEndPosition = inputText.length();
337
338    format.parse(inputText, result, pos);
339
340    if (expectedParseEndPosition != pos.getIndex()) {
341        errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d.  "
342              "Actual parse position: %d", expectedParseEndPosition, pos.getIndex());
343        return;
344    }
345
346    char   expectedTypeC[2];
347    expectedType.extract(0, 1, expectedTypeC, 2, US_INV);
348    Formattable::Type expectType = Formattable::kDate;
349    switch (expectedTypeC[0]) {
350      case 'd': expectType = Formattable::kDouble; break;
351      case 'i': expectType = Formattable::kLong;   break;
352      case 'l': expectType = Formattable::kInt64;  break;
353      default:
354          errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"",
355              lineNum, InvariantStringPiece(expectedType).data());
356          return;
357    }
358    if (result.getType() != expectType) {
359        errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)",
360             lineNum, formattableType(expectType), formattableType(result.getType()));
361        return;
362    }
363
364    StringPiece decimalResult = result.getDecimalNumber(status);
365    if (U_FAILURE(status)) {
366        errln("File %s, line %d: error %s.  Line in file dcfmtest.txt:  %d:",
367            __FILE__, __LINE__, u_errorName(status), lineNum);
368        return;
369    }
370
371    InvariantStringPiece expectedResults(expectedDecimal);
372    if (decimalResult != expectedResults) {
373        errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
374            lineNum, expectedResults.data(), decimalResult.data());
375    }
376
377    return;
378}
379
380
381void DecimalFormatTest::execFormatTest(int32_t lineNum,
382                           const UnicodeString &pattern,     // Pattern
383                           const UnicodeString &round,       // rounding mode
384                           const UnicodeString &input,       // input decimal number
385                           const UnicodeString &expected,    // expected formatted result
386                           EFormatInputType inType,          // input number type
387                           UErrorCode &status) {
388    if (U_FAILURE(status)) {
389        return;
390    }
391
392    DecimalFormatSymbols symbols(Locale::getUS(), status);
393    // printf("Pattern = %s\n", UnicodeStringPiece(pattern).data());
394    DecimalFormat fmtr(pattern, symbols, status);
395    if (U_FAILURE(status)) {
396        dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
397            lineNum, u_errorName(status));
398        return;
399    }
400    if (round=="ceiling") {
401        fmtr.setRoundingMode(DecimalFormat::kRoundCeiling);
402    } else if (round=="floor") {
403        fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
404    } else if (round=="down") {
405        fmtr.setRoundingMode(DecimalFormat::kRoundDown);
406    } else if (round=="up") {
407        fmtr.setRoundingMode(DecimalFormat::kRoundUp);
408    } else if (round=="halfeven") {
409        fmtr.setRoundingMode(DecimalFormat::kRoundHalfEven);
410    } else if (round=="halfdown") {
411        fmtr.setRoundingMode(DecimalFormat::kRoundHalfDown);
412    } else if (round=="halfup") {
413        fmtr.setRoundingMode(DecimalFormat::kRoundHalfUp);
414    } else if (round=="default") {
415        // don't set any value.
416    } else if (round=="unnecessary") {
417        fmtr.setRoundingMode(DecimalFormat::kRoundUnnecessary);
418    } else {
419        fmtr.setRoundingMode(DecimalFormat::kRoundFloor);
420        errln("file dcfmtest.txt, line %d: Bad rounding mode \"%s\"",
421                lineNum, UnicodeStringPiece(round).data());
422    }
423
424    const char *typeStr = "Unknown";
425    UnicodeString result;
426    UnicodeStringPiece spInput(input);
427
428    switch (inType) {
429    case kFormattable:
430        {
431            typeStr = "Formattable";
432            Formattable fmtbl;
433            fmtbl.setDecimalNumber(spInput, status);
434            fmtr.format(fmtbl, result, NULL, status);
435        }
436        break;
437    case kStringPiece:
438        typeStr = "StringPiece";
439        fmtr.format(spInput, result, NULL, status);
440        break;
441    }
442
443    if ((status == U_FORMAT_INEXACT_ERROR) && (result == "") && (expected == "Inexact")) {
444        // Test succeeded.
445        status = U_ZERO_ERROR;
446        return;
447    }
448
449    if (U_FAILURE(status)) {
450        errln("[%s] file dcfmtest.txt, line %d: format() returned %s.",
451            typeStr, lineNum, u_errorName(status));
452        status = U_ZERO_ERROR;
453        return;
454    }
455
456    if (result != expected) {
457        errln("[%s] file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
458            typeStr, lineNum, UnicodeStringPiece(expected).data(), UnicodeStringPiece(result).data());
459    }
460}
461
462
463//-------------------------------------------------------------------------------
464//
465//  Read a text data file, convert it from UTF-8 to UChars, and return the data
466//    in one big UChar * buffer, which the caller must delete.
467//
468//    (Lightly modified version of a similar function in regextst.cpp)
469//
470//--------------------------------------------------------------------------------
471UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
472                                     UErrorCode &status) {
473    UChar       *retPtr  = NULL;
474    char        *fileBuf = NULL;
475    const char  *fileBufNoBOM = NULL;
476    FILE        *f       = NULL;
477
478    ulen = 0;
479    if (U_FAILURE(status)) {
480        return retPtr;
481    }
482
483    //
484    //  Open the file.
485    //
486    f = fopen(fileName, "rb");
487    if (f == 0) {
488        dataerrln("Error opening test data file %s\n", fileName);
489        status = U_FILE_ACCESS_ERROR;
490        return NULL;
491    }
492    //
493    //  Read it in
494    //
495    int32_t            fileSize;
496    int32_t            amtRead;
497    int32_t            amtReadNoBOM;
498
499    fseek( f, 0, SEEK_END);
500    fileSize = ftell(f);
501    fileBuf = new char[fileSize];
502    fseek(f, 0, SEEK_SET);
503    amtRead = fread(fileBuf, 1, fileSize, f);
504    if (amtRead != fileSize || fileSize <= 0) {
505        errln("Error reading test data file.");
506        goto cleanUpAndReturn;
507    }
508
509    //
510    // Look for a UTF-8 BOM on the data just read.
511    //    The test data file is UTF-8.
512    //    The BOM needs to be there in the source file to keep the Windows &
513    //    EBCDIC machines happy, so force an error if it goes missing.
514    //    Many Linux editors will silently strip it.
515    //
516    fileBufNoBOM = fileBuf + 3;
517    amtReadNoBOM = amtRead - 3;
518    if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
519        // TODO:  restore this check.
520        errln("Test data file %s is missing its BOM", fileName);
521        fileBufNoBOM = fileBuf;
522        amtReadNoBOM = amtRead;
523    }
524
525    //
526    // Find the length of the input in UTF-16 UChars
527    //  (by preflighting the conversion)
528    //
529    u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
530
531    //
532    // Convert file contents from UTF-8 to UTF-16
533    //
534    if (status == U_BUFFER_OVERFLOW_ERROR) {
535        // Buffer Overflow is expected from the preflight operation.
536        status = U_ZERO_ERROR;
537        retPtr = new UChar[ulen+1];
538        u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
539    }
540
541cleanUpAndReturn:
542    fclose(f);
543    delete[] fileBuf;
544    if (U_FAILURE(status)) {
545        errln("ICU Error \"%s\"\n", u_errorName(status));
546        delete retPtr;
547        retPtr = NULL;
548    };
549    return retPtr;
550}
551
552#endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
553
554