1/*
2*******************************************************************************
3*
4*   Copyright (C) 2003-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  testidn.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2003-02-06
14*   created by: Ram Viswanadha
15*
16*   This program reads the rfc3454_*.txt files,
17*   parses them, and extracts the data for Nameprep conformance.
18*   It then preprocesses it and writes a binary file for efficient use
19*   in various IDNA conversion processes.
20*/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
25
26#define USPREP_TYPE_NAMES_ARRAY
27
28#include "unicode/uchar.h"
29#include "unicode/putil.h"
30#include "cmemory.h"
31#include "cstring.h"
32#include "unicode/udata.h"
33#include "unicode/utf16.h"
34#include "unewdata.h"
35#include "uoptions.h"
36#include "uparse.h"
37#include "utrie.h"
38#include "umutex.h"
39#include "sprpimpl.h"
40#include "testidna.h"
41#include "punyref.h"
42#include <stdlib.h>
43
44UBool beVerbose=FALSE, haveCopyright=TRUE;
45
46/* prototypes --------------------------------------------------------------- */
47
48
49static void
50parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
51
52static void
53compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
54               UStringPrepType option);
55
56static void
57compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
58
59static void
60testAllCodepoints(TestIDNA& test);
61
62static TestIDNA* pTestIDNA =NULL;
63
64static const char* fileNames[] = {
65                                    "rfc3491.txt"
66                                 };
67static const UTrie *idnTrie              = NULL;
68static const int32_t *indexes            = NULL;
69static const uint16_t *mappingData       = NULL;
70/* -------------------------------------------------------------------------- */
71
72/* file definitions */
73#define DATA_TYPE "icu"
74
75#define SPREP_DIR "sprep"
76
77extern int
78testData(TestIDNA& test) {
79    char *basename=NULL;
80    UErrorCode errorCode=U_ZERO_ERROR;
81    char *saveBasename =NULL;
82
83    LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode));
84    if(U_FAILURE(errorCode)){
85        test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
86        return errorCode;
87    }
88
89    char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
90    //TODO get the srcDir dynamically
91    const char *srcDir=IntlTest::pathToDataDirectory();
92
93    idnTrie     = &profile->sprepTrie;
94    indexes     = profile->indexes;
95    mappingData = profile->mappingData;
96
97    //initialize
98    pTestIDNA = &test;
99
100    /* prepare the filename beginning with the source dir */
101    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
102        filename[0] = 0x2E;
103        filename[1] = U_FILE_SEP_CHAR;
104        uprv_strcpy(filename+2,srcDir);
105    }else{
106        uprv_strcpy(filename, srcDir);
107    }
108    basename=filename+uprv_strlen(filename);
109    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
110        *basename++=U_FILE_SEP_CHAR;
111    }
112
113    /* process unassigned */
114    basename=filename+uprv_strlen(filename);
115    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
116        *basename++=U_FILE_SEP_CHAR;
117    }
118
119    /* first copy misc directory */
120    saveBasename = basename;
121    uprv_strcpy(basename,SPREP_DIR);
122    basename = basename + uprv_strlen(SPREP_DIR);
123    *basename++=U_FILE_SEP_CHAR;
124
125    /* process unassigned */
126    uprv_strcpy(basename,fileNames[0]);
127    parseMappings(filename,TRUE, test,&errorCode);
128    if(U_FAILURE(errorCode)) {
129        test.errln( "Could not open file %s for reading \n", filename);
130        return errorCode;
131    }
132
133    testAllCodepoints(test);
134
135    pTestIDNA = NULL;
136    free(filename);
137    return errorCode;
138}
139U_CDECL_BEGIN
140
141static void U_CALLCONV
142strprepProfileLineFn(void * /*context*/,
143              char *fields[][2], int32_t fieldCount,
144              UErrorCode *pErrorCode) {
145    uint32_t mapping[40];
146    char *end, *map;
147    uint32_t code;
148    int32_t length;
149   /*UBool* mapWithNorm = (UBool*) context;*/
150    const char* typeName;
151    uint32_t rangeStart=0,rangeEnd =0;
152    const char *s;
153
154    s = u_skipWhitespace(fields[0][0]);
155    if (*s == '@') {
156        /* a special directive introduced in 4.2 */
157        return;
158    }
159
160    if(fieldCount != 3){
161        *pErrorCode = U_INVALID_FORMAT_ERROR;
162        return;
163    }
164
165    typeName = fields[2][0];
166    map = fields[1][0];
167
168    if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
169
170        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
171
172        /* store the range */
173        compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
174
175    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
176
177        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
178
179        /* store the range */
180        compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
181
182    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
183        /* get the character code, field 0 */
184        code=(uint32_t)uprv_strtoul(s, &end, 16);
185
186        /* parse the mapping string */
187        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
188
189        /* store the mapping */
190        compareMapping(code,mapping, length,USPREP_MAP);
191
192    }else{
193        *pErrorCode = U_INVALID_FORMAT_ERROR;
194    }
195
196}
197
198U_CDECL_END
199
200static void
201parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
202    char *fields[3][2];
203
204    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
205        return;
206    }
207
208    u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
209
210    //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
211
212    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
213        test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
214    }
215}
216
217
218static inline UStringPrepType
219getValues(uint32_t result, int32_t& value, UBool& isIndex){
220
221    UStringPrepType type;
222
223    if(result == 0){
224        /*
225         * Initial value stored in the mapping table
226         * just return USPREP_TYPE_LIMIT .. so that
227         * the source codepoint is copied to the destination
228         */
229        type = USPREP_TYPE_LIMIT;
230        isIndex =FALSE;
231        value = 0;
232    }else if(result >= _SPREP_TYPE_THRESHOLD){
233        type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
234        isIndex =FALSE;
235        value = 0;
236    }else{
237        /* get the state */
238        type = USPREP_MAP;
239        /* ascertain if the value is index or delta */
240        if(result & 0x02){
241            isIndex = TRUE;
242            value = result  >> 2; //mask off the lower 2 bits and shift
243
244        }else{
245            isIndex = FALSE;
246            value = (int16_t)result;
247            value =  (value >> 2);
248
249        }
250        if((result>>2) == _SPREP_MAX_INDEX_VALUE){
251            type = USPREP_DELETE;
252            isIndex =FALSE;
253            value = 0;
254        }
255    }
256    return type;
257}
258
259
260
261static void
262testAllCodepoints(TestIDNA& test){
263    /*
264    {
265        UChar str[19] = {
266                            0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
267                            0x070F,//prohibited
268                            0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
269                        };
270        uint32_t in[19] = {0};
271        UErrorCode status = U_ZERO_ERROR;
272        int32_t inLength=0, outLength=100;
273        char output[100] = {0};
274        punycode_status error;
275        u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
276
277        error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
278        printf(output);
279
280    }
281    */
282
283    uint32_t i = 0;
284    int32_t unassigned      = 0;
285    int32_t prohibited      = 0;
286    int32_t mappedWithNorm  = 0;
287    int32_t mapped          = 0;
288    int32_t noValueInTrie   = 0;
289
290    UStringPrepType type;
291    int32_t value;
292    UBool isIndex = FALSE;
293
294    for(i=0;i<=0x10FFFF;i++){
295        uint32_t result = 0;
296        UTRIE_GET16(idnTrie,i, result);
297        type = getValues(result,value, isIndex);
298        if(type != USPREP_TYPE_LIMIT ){
299            if(type == USPREP_UNASSIGNED){
300                unassigned++;
301            }
302            if(type == USPREP_PROHIBITED){
303                prohibited++;
304            }
305            if(type == USPREP_MAP){
306                mapped++;
307            }
308        }else{
309            noValueInTrie++;
310            if(result > 0){
311                test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
312            }
313        }
314    }
315
316    test.logln("Number of Unassinged code points : %i \n",unassigned);
317    test.logln("Number of Prohibited code points : %i \n",prohibited);
318    test.logln("Number of Mapped code points : %i \n",mapped);
319    test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
320    test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
321
322
323}
324
325static void
326compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
327               UStringPrepType type){
328    uint32_t result = 0;
329    UTRIE_GET16(idnTrie,codepoint, result);
330
331    int32_t length=0;
332    UBool isIndex;
333    UStringPrepType retType;
334    int32_t value, index=0, delta=0;
335
336    retType = getValues(result,value,isIndex);
337
338
339    if(type != retType && retType != USPREP_DELETE){
340
341        pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
342
343    }
344
345    if(isIndex){
346        index = value;
347        if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
348                 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
349            length = 1;
350        }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
351                 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
352            length = 2;
353        }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
354                 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
355            length = 3;
356        }else{
357            length = mappingData[index++];
358        }
359    }else{
360        delta = value;
361        length = (retType == USPREP_DELETE)? 0 :  1;
362    }
363
364    int32_t realLength =0;
365    /* figure out the real length */
366    for(int32_t j=0; j<mapLength; j++){
367        if(mapping[j] > 0xFFFF){
368            realLength +=2;
369        }else{
370            realLength++;
371        }
372    }
373
374    if(realLength != length){
375        pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
376    }
377
378    if(isIndex){
379        for(int8_t i =0; i< mapLength; i++){
380            if(mapping[i] <= 0xFFFF){
381                if(mappingData[index+i] != (uint16_t)mapping[i]){
382                    pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
383                }
384            }else{
385                UChar lead  = U16_LEAD(mapping[i]);
386                UChar trail = U16_TRAIL(mapping[i]);
387                if(mappingData[index+i] != lead ||
388                    mappingData[index+i+1] != trail){
389                    pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
390                }
391            }
392        }
393    }else{
394        if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
395            pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
396        }
397    }
398
399}
400
401static void
402compareFlagsForRange(uint32_t start, uint32_t end,
403                     UStringPrepType type){
404
405    uint32_t result =0 ;
406    UStringPrepType retType;
407    UBool isIndex=FALSE;
408    int32_t value=0;
409/*
410    // supplementary code point
411    UChar __lead16=U16_LEAD(0x2323E);
412    int32_t __offset;
413
414    // get data for lead surrogate
415    (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
416    __offset=(&idnTrie)->getFoldingOffset(result);
417
418    // get the real data from the folded lead/trail units
419    if(__offset>0) {
420        (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
421    } else {
422        (result)=(uint32_t)((&idnTrie)->initialValue);
423    }
424
425    UTRIE_GET16(&idnTrie,0x2323E, result);
426*/
427    while(start < end+1){
428        UTRIE_GET16(idnTrie,start, result);
429        retType = getValues(result,value,isIndex);
430        if(result > _SPREP_TYPE_THRESHOLD){
431            if(retType != type){
432                pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
433            }
434        }else{
435            if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
436                pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
437            }
438        }
439
440        start++;
441    }
442
443}
444
445
446#endif /* #if !UCONFIG_NO_IDNA */
447
448/*
449 * Hey, Emacs, please set the following:
450 *
451 * Local Variables:
452 * indent-tabs-mode: nil
453 * End:
454 *
455 */
456