1/*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2005-2009, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 *   created on: 2005jun15
10 *   created by: Raymond Yang
11 */
12
13#if !UCONFIG_NO_IDNA
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include "unicode/utypes.h"
19#include "unicode/ucnv.h"
20#include "unicode/ustring.h"
21#include "unicode/uidna.h"
22
23#include "idnaconf.h"
24
25static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
26static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
27static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase
28static const UChar C_NAMEUTF8[] = {0x6E, 0x61, 0x6D, 0x65, 0x75, 0x74, 0x66, 0x38, 0}; // nameutf8
29
30static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type
31static const UChar C_TOASCII[]  =  {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0};       // toascii
32static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode
33
34static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail
35static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass
36static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail
37
38static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc
39static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44,
40       0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules
41
42IdnaConfTest::IdnaConfTest(){
43    base = NULL;
44    len = 0;
45    curOffset = 0;
46
47    type = option = passfail = -1;
48    namebase.setToBogus();
49    namezone.setToBogus();
50}
51IdnaConfTest::~IdnaConfTest(){
52    delete [] base;
53}
54
55#if !UCONFIG_NO_IDNA
56/* this function is modified from RBBITest::ReadAndConvertFile()
57 *
58 */
59UBool IdnaConfTest::ReadAndConvertFile(){
60
61    char * source = NULL;
62    size_t source_len;
63
64    // read the test data file to memory
65    FILE* f    = NULL;
66    UErrorCode  status  = U_ZERO_ERROR;
67
68    const char *path = IntlTest::getSourceTestData(status);
69    if (U_FAILURE(status)) {
70        errln("%s", u_errorName(status));
71        return FALSE;
72    }
73
74    const char* name = "idna_conf.txt";     // test data file
75    int t = strlen(path) + strlen(name) + 1;
76    char* absolute_name = new char[t];
77    strcpy(absolute_name, path);
78    strcat(absolute_name, name);
79    f = fopen(absolute_name, "rb");
80    delete [] absolute_name;
81
82    if (f == NULL){
83        dataerrln("fopen error on %s", name);
84        return FALSE;
85    }
86
87    fseek( f, 0, SEEK_END);
88    if ((source_len = ftell(f)) <= 0){
89        errln("Error reading test data file.");
90        fclose(f);
91        return FALSE;
92    }
93
94    source = new char[source_len];
95    fseek(f, 0, SEEK_SET);
96    if (fread(source, 1, source_len, f) != source_len) {
97        errln("Error reading test data file.");
98        delete [] source;
99        fclose(f);
100        return FALSE;
101    }
102    fclose(f);
103
104    // convert the UTF-8 encoded stream to UTF-16 stream
105    UConverter* conv = ucnv_open("utf-8", &status);
106    int dest_len = ucnv_toUChars(conv,
107                                NULL,           //  dest,
108                                0,              //  destCapacity,
109                                source,
110                                source_len,
111                                &status);
112    if (status == U_BUFFER_OVERFLOW_ERROR) {
113        // Buffer Overflow is expected from the preflight operation.
114        status = U_ZERO_ERROR;
115        UChar * dest = NULL;
116        dest = new UChar[ dest_len + 1];
117        ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status);
118        // Do not know the "if possible" behavior of ucnv_toUChars()
119        // Do it by ourself.
120        dest[dest_len] = 0;
121        len = dest_len;
122        base = dest;
123        delete [] source;
124        ucnv_close(conv);
125        return TRUE;    // The buffer will owned by caller.
126    }
127    errln("UConverter error: %s", u_errorName(status));
128    delete [] source;
129    ucnv_close(conv);
130    return FALSE;
131}
132
133int IdnaConfTest::isNewlineMark(){
134    static const UChar LF        = 0x0a;
135    static const UChar CR        = 0x0d;
136    UChar c = base[curOffset];
137    // CR LF
138    if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){
139        return 2;
140    }
141
142    // CR or LF
143    if ( c == CR || c == LF) {
144        return 1;
145    }
146
147    return 0;
148}
149
150/* Read a logical line.
151 *
152 * All lines ending in a backslash (\) and immediately followed by a newline
153 * character are joined with the next line in the source file forming logical
154 * lines from the physical lines.
155 *
156 */
157UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){
158    if ( !(curOffset < len) ) return FALSE; // stream end
159
160    static const UChar BACKSLASH = 0x5c;
161    buf.remove();
162    int t = 0;
163    while (curOffset < len){
164        if ((t = isNewlineMark())) {  // end of line
165            curOffset += t;
166            break;
167        }
168        UChar c = base[curOffset];
169        if (c == BACKSLASH && curOffset < len -1){  // escaped new line mark
170            if ((t = isNewlineMark())){
171                curOffset += 1 + t;  // BACKSLAH and NewlineMark
172                continue;
173            }
174        };
175        buf.append(c);
176        curOffset++;
177    }
178    return TRUE;
179}
180
181//
182//===============================================================
183//
184
185/* Explain <xxxxx> tag to a native value
186 *
187 * Since <xxxxx> is always larger than the native value,
188 * the operation will replace the tag directly in the buffer,
189 * and, of course, will shift tail elements.
190 */
191void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){
192    buf.append((UChar)0);    // add a terminal NULL
193    UChar* bufBase = buf.getBuffer(buf.length());
194    UChar* p = bufBase;
195    while (*p != 0){
196        if ( *p != 0x3C){    // <
197            *bufBase++ = *p++;
198        } else {
199            p++;    // skip <
200            UChar32 cp = 0;
201            for ( ;*p != 0x3E; p++){   // >
202                if (0x30 <= *p && *p <= 0x39){        // 0-9
203                    cp = (cp * 16) + (*p - 0x30);
204                } else if (0x61 <= *p && *p <= 0x66){ // a-f
205                    cp = (cp * 16) + (*p - 0x61) + 10;
206                } else if (0x41 <= *p && *p <= 0x46) {// A-F
207                    cp = (cp * 16) + (*p - 0x41) + 10;
208                }
209                // no else. hope everything is good.
210            }
211            p++;    // skip >
212            if (U_IS_BMP(cp)){
213                *bufBase++ = cp;
214            } else {
215                *bufBase++ = U16_LEAD(cp);
216                *bufBase++ = U16_TRAIL(cp);
217            }
218        }
219    }
220    *bufBase = 0;  // close our buffer
221    buf.releaseBuffer();
222}
223
224void IdnaConfTest::Call(){
225    if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){
226        errln("Incomplete record");
227    } else {
228        UErrorCode status = U_ZERO_ERROR;
229        UChar result[200] = {0,};   // simple life
230        const UChar *p = namebase.getTerminatedBuffer();
231        const int p_len = namebase.length();
232
233        if (type == 0 && option == 0){
234            uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
235        } else if (type == 0 && option == 1){
236            uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
237        } else if (type == 1 && option == 0){
238            uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
239        } else if (type == 1 && option == 1){
240            uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
241        }
242        if (passfail == 0){
243            if (U_FAILURE(status)){
244                id.append(" should pass, but failed. - ");
245                id.append(u_errorName(status));
246                errcheckln(status, id);
247            } else{
248                if (namezone.compare(result, -1) == 0){
249                    // expected
250                    logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
251                } else {
252                    id.append(" no error, but result is not as expected.");
253                    errln(id);
254                }
255            }
256        } else if (passfail == 1){
257            if (U_FAILURE(status)){
258                // expected
259                // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName
260                //logln("Got the expected error: " + UnicodeString(u_errorName(status)));
261            } else{
262                if (namebase.compare(result, -1) == 0){
263                    // garbage in -> garbage out
264                    logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
265                } else {
266                    id.append(" should fail, but not failed. ");
267                    id.append(u_errorName(status));
268                    errln(id);
269                }
270            }
271        }
272    }
273    type = option = passfail = -1;
274    namebase.setToBogus();
275    namezone.setToBogus();
276    id.remove();
277    return;
278}
279
280void IdnaConfTest::Test(void){
281    if (!ReadAndConvertFile())return;
282
283    UnicodeString s;
284    UnicodeString key;
285    UnicodeString value;
286
287    // skip everything before the first "=====" and "=====" itself
288    do {
289        if (!ReadOneLine(s)) {
290            errln("End of file prematurely found");
291            break;
292        }
293    }
294    while (s.compare(C_TAG, -1) != 0);   //"====="
295
296    while(ReadOneLine(s)){
297        s.trim();
298        key.remove();
299        value.remove();
300        if (s.compare(C_TAG, -1) == 0){   //"====="
301            Call();
302       } else {
303            // explain      key:value
304            int p = s.indexOf((UChar)0x3A);    // :
305            key.setTo(s,0,p).trim();
306            value.setTo(s,p+1).trim();
307            if (key.compare(C_TYPE, -1) == 0){
308                if (value.compare(C_TOASCII, -1) == 0) {
309                    type = 0;
310                } else if (value.compare(C_TOUNICODE, -1) == 0){
311                    type = 1;
312                }
313            } else if (key.compare(C_PASSFAIL, -1) == 0){
314                if (value.compare(C_PASS, -1) == 0){
315                    passfail = 0;
316                } else if (value.compare(C_FAIL, -1) == 0){
317                    passfail = 1;
318                }
319            } else if (key.compare(C_DESC, -1) == 0){
320                if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
321                    option = 1; // not found
322                } else {
323                    option = 0;
324                }
325                id.setTo(value, 0, value.indexOf((UChar)0x20));    // space
326            } else if (key.compare(C_NAMEZONE, -1) == 0){
327                ExplainCodePointTag(value);
328                namezone.setTo(value);
329            } else if (key.compare(C_NAMEBASE, -1) == 0){
330                ExplainCodePointTag(value);
331                namebase.setTo(value);
332            }
333            // just skip other lines
334        }
335    }
336
337    Call(); // for last record
338}
339#else
340void IdnaConfTest::Test(void)
341{
342  // test nothing...
343}
344#endif
345
346void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){
347    switch (index) {
348        TESTCASE(0,Test);
349        default: name = ""; break;
350    }
351}
352
353#endif
354