1/*
2*******************************************************************************
3*
4*   Copyright (C) 2003-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  testidn.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2003-02-06
14*   created by: Ram Viswanadha
15*
16*   This program reads the rfc3454_*.txt files,
17*   parses them, and extracts the data for Nameprep conformance.
18*   It then preprocesses it and writes a binary file for efficient use
19*   in various IDNA conversion processes.
20*/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
25
26#define USPREP_TYPE_NAMES_ARRAY
27
28#include "unicode/uchar.h"
29#include "unicode/putil.h"
30#include "cmemory.h"
31#include "cstring.h"
32#include "unicode/udata.h"
33#include "unicode/utf16.h"
34#include "unewdata.h"
35#include "uoptions.h"
36#include "uparse.h"
37#include "utrie.h"
38#include "umutex.h"
39#include "sprpimpl.h"
40#include "testidna.h"
41#include "punyref.h"
42#include <stdlib.h>
43
44UBool beVerbose=FALSE, haveCopyright=TRUE;
45
46/* prototypes --------------------------------------------------------------- */
47
48
49static void
50parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
51
52static void
53compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
54               UStringPrepType option);
55
56static void
57compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
58
59static void
60testAllCodepoints(TestIDNA& test);
61
62static TestIDNA* pTestIDNA =NULL;
63
64static const char* fileNames[] = {
65                                    "rfc3491.txt"
66                                 };
67static const UTrie *idnTrie              = NULL;
68static const int32_t *indexes            = NULL;
69static const uint16_t *mappingData       = NULL;
70/* -------------------------------------------------------------------------- */
71
72/* file definitions */
73#define DATA_TYPE "icu"
74
75#define SPREP_DIR "sprep"
76
77extern int
78testData(TestIDNA& test) {
79    char *basename=NULL;
80    UErrorCode errorCode=U_ZERO_ERROR;
81    char *saveBasename =NULL;
82
83    LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode));
84    if(U_FAILURE(errorCode)){
85        test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
86        return errorCode;
87    }
88
89    char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
90    //TODO get the srcDir dynamically
91    const char *srcDir=IntlTest::pathToDataDirectory();
92
93    idnTrie     = &profile->sprepTrie;
94    indexes     = profile->indexes;
95    mappingData = profile->mappingData;
96
97    //initialize
98    pTestIDNA = &test;
99
100    /* prepare the filename beginning with the source dir */
101    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
102        filename[0] = 0x2E;
103        filename[1] = U_FILE_SEP_CHAR;
104        uprv_strcpy(filename+2,srcDir);
105    }else{
106        uprv_strcpy(filename, srcDir);
107    }
108    basename=filename+uprv_strlen(filename);
109    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
110        *basename++=U_FILE_SEP_CHAR;
111    }
112
113    /* process unassigned */
114    basename=filename+uprv_strlen(filename);
115    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
116        *basename++=U_FILE_SEP_CHAR;
117    }
118
119    /* first copy misc directory */
120    saveBasename = basename;
121    (void)saveBasename;    // Suppress set but not used warning.
122    uprv_strcpy(basename,SPREP_DIR);
123    basename = basename + uprv_strlen(SPREP_DIR);
124    *basename++=U_FILE_SEP_CHAR;
125
126    /* process unassigned */
127    uprv_strcpy(basename,fileNames[0]);
128    parseMappings(filename,TRUE, test,&errorCode);
129    if(U_FAILURE(errorCode)) {
130        test.errln( "Could not open file %s for reading \n", filename);
131        return errorCode;
132    }
133
134    testAllCodepoints(test);
135
136    pTestIDNA = NULL;
137    free(filename);
138    return errorCode;
139}
140U_CDECL_BEGIN
141
142static void U_CALLCONV
143strprepProfileLineFn(void * /*context*/,
144              char *fields[][2], int32_t fieldCount,
145              UErrorCode *pErrorCode) {
146    uint32_t mapping[40];
147    char *end, *map;
148    uint32_t code;
149    int32_t length;
150   /*UBool* mapWithNorm = (UBool*) context;*/
151    const char* typeName;
152    uint32_t rangeStart=0,rangeEnd =0;
153    const char *s;
154
155    s = u_skipWhitespace(fields[0][0]);
156    if (*s == '@') {
157        /* a special directive introduced in 4.2 */
158        return;
159    }
160
161    if(fieldCount != 3){
162        *pErrorCode = U_INVALID_FORMAT_ERROR;
163        return;
164    }
165
166    typeName = fields[2][0];
167    map = fields[1][0];
168
169    if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
170
171        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
172
173        /* store the range */
174        compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
175
176    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
177
178        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
179
180        /* store the range */
181        compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
182
183    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
184        /* get the character code, field 0 */
185        code=(uint32_t)uprv_strtoul(s, &end, 16);
186
187        /* parse the mapping string */
188        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
189
190        /* store the mapping */
191        compareMapping(code,mapping, length,USPREP_MAP);
192
193    }else{
194        *pErrorCode = U_INVALID_FORMAT_ERROR;
195    }
196
197}
198
199U_CDECL_END
200
201static void
202parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
203    char *fields[3][2];
204
205    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
206        return;
207    }
208
209    u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
210
211    //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
212
213    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
214        test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
215    }
216}
217
218
219static inline UStringPrepType
220getValues(uint32_t result, int32_t& value, UBool& isIndex){
221
222    UStringPrepType type;
223
224    if(result == 0){
225        /*
226         * Initial value stored in the mapping table
227         * just return USPREP_TYPE_LIMIT .. so that
228         * the source codepoint is copied to the destination
229         */
230        type = USPREP_TYPE_LIMIT;
231        isIndex =FALSE;
232        value = 0;
233    }else if(result >= _SPREP_TYPE_THRESHOLD){
234        type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
235        isIndex =FALSE;
236        value = 0;
237    }else{
238        /* get the state */
239        type = USPREP_MAP;
240        /* ascertain if the value is index or delta */
241        if(result & 0x02){
242            isIndex = TRUE;
243            value = result  >> 2; //mask off the lower 2 bits and shift
244
245        }else{
246            isIndex = FALSE;
247            value = (int16_t)result;
248            value =  (value >> 2);
249
250        }
251        if((result>>2) == _SPREP_MAX_INDEX_VALUE){
252            type = USPREP_DELETE;
253            isIndex =FALSE;
254            value = 0;
255        }
256    }
257    return type;
258}
259
260
261
262static void
263testAllCodepoints(TestIDNA& test){
264    /*
265    {
266        UChar str[19] = {
267                            0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
268                            0x070F,//prohibited
269                            0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
270                        };
271        uint32_t in[19] = {0};
272        UErrorCode status = U_ZERO_ERROR;
273        int32_t inLength=0, outLength=100;
274        char output[100] = {0};
275        punycode_status error;
276        u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
277
278        error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
279        printf(output);
280
281    }
282    */
283
284    uint32_t i = 0;
285    int32_t unassigned      = 0;
286    int32_t prohibited      = 0;
287    int32_t mappedWithNorm  = 0;
288    int32_t mapped          = 0;
289    int32_t noValueInTrie   = 0;
290
291    UStringPrepType type;
292    int32_t value;
293    UBool isIndex = FALSE;
294
295    for(i=0;i<=0x10FFFF;i++){
296        uint32_t result = 0;
297        UTRIE_GET16(idnTrie,i, result);
298        type = getValues(result,value, isIndex);
299        if(type != USPREP_TYPE_LIMIT ){
300            if(type == USPREP_UNASSIGNED){
301                unassigned++;
302            }
303            if(type == USPREP_PROHIBITED){
304                prohibited++;
305            }
306            if(type == USPREP_MAP){
307                mapped++;
308            }
309        }else{
310            noValueInTrie++;
311            if(result > 0){
312                test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
313            }
314        }
315    }
316
317    test.logln("Number of Unassinged code points : %i \n",unassigned);
318    test.logln("Number of Prohibited code points : %i \n",prohibited);
319    test.logln("Number of Mapped code points : %i \n",mapped);
320    test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
321    test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
322
323
324}
325
326static void
327compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
328               UStringPrepType type){
329    uint32_t result = 0;
330    UTRIE_GET16(idnTrie,codepoint, result);
331
332    int32_t length=0;
333    UBool isIndex;
334    UStringPrepType retType;
335    int32_t value, index=0, delta=0;
336
337    retType = getValues(result,value,isIndex);
338
339
340    if(type != retType && retType != USPREP_DELETE){
341
342        pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
343
344    }
345
346    if(isIndex){
347        index = value;
348        if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
349                 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
350            length = 1;
351        }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
352                 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
353            length = 2;
354        }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
355                 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
356            length = 3;
357        }else{
358            length = mappingData[index++];
359        }
360    }else{
361        delta = value;
362        length = (retType == USPREP_DELETE)? 0 :  1;
363    }
364
365    int32_t realLength =0;
366    /* figure out the real length */
367    for(int32_t j=0; j<mapLength; j++){
368        if(mapping[j] > 0xFFFF){
369            realLength +=2;
370        }else{
371            realLength++;
372        }
373    }
374
375    if(realLength != length){
376        pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
377    }
378
379    if(isIndex){
380        for(int8_t i =0; i< mapLength; i++){
381            if(mapping[i] <= 0xFFFF){
382                if(mappingData[index+i] != (uint16_t)mapping[i]){
383                    pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
384                }
385            }else{
386                UChar lead  = U16_LEAD(mapping[i]);
387                UChar trail = U16_TRAIL(mapping[i]);
388                if(mappingData[index+i] != lead ||
389                    mappingData[index+i+1] != trail){
390                    pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
391                }
392            }
393        }
394    }else{
395        if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
396            pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
397        }
398    }
399
400}
401
402static void
403compareFlagsForRange(uint32_t start, uint32_t end,
404                     UStringPrepType type){
405
406    uint32_t result =0 ;
407    UStringPrepType retType;
408    UBool isIndex=FALSE;
409    int32_t value=0;
410/*
411    // supplementary code point
412    UChar __lead16=U16_LEAD(0x2323E);
413    int32_t __offset;
414
415    // get data for lead surrogate
416    (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
417    __offset=(&idnTrie)->getFoldingOffset(result);
418
419    // get the real data from the folded lead/trail units
420    if(__offset>0) {
421        (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
422    } else {
423        (result)=(uint32_t)((&idnTrie)->initialValue);
424    }
425
426    UTRIE_GET16(&idnTrie,0x2323E, result);
427*/
428    while(start < end+1){
429        UTRIE_GET16(idnTrie,start, result);
430        retType = getValues(result,value,isIndex);
431        if(result > _SPREP_TYPE_THRESHOLD){
432            if(retType != type){
433                pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
434            }
435        }else{
436            if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
437                pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
438            }
439        }
440
441        start++;
442    }
443
444}
445
446
447#endif /* #if !UCONFIG_NO_IDNA */
448
449/*
450 * Hey, Emacs, please set the following:
451 *
452 * Local Variables:
453 * indent-tabs-mode: nil
454 * End:
455 *
456 */
457