1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 2003-2013, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*   file name:  testidn.cpp
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 2003-02-06
16*   created by: Ram Viswanadha
17*
18*   This program reads the rfc3454_*.txt files,
19*   parses them, and extracts the data for Nameprep conformance.
20*   It then preprocesses it and writes a binary file for efficient use
21*   in various IDNA conversion processes.
22*/
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
27
28#define USPREP_TYPE_NAMES_ARRAY
29
30#include "unicode/uchar.h"
31#include "unicode/putil.h"
32#include "cmemory.h"
33#include "cstring.h"
34#include "unicode/udata.h"
35#include "unicode/utf16.h"
36#include "unewdata.h"
37#include "uoptions.h"
38#include "uparse.h"
39#include "utrie.h"
40#include "umutex.h"
41#include "sprpimpl.h"
42#include "testidna.h"
43#include "punyref.h"
44#include <stdlib.h>
45
46UBool beVerbose=FALSE, haveCopyright=TRUE;
47
48/* prototypes --------------------------------------------------------------- */
49
50
51static void
52parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
53
54static void
55compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
56               UStringPrepType option);
57
58static void
59compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
60
61static void
62testAllCodepoints(TestIDNA& test);
63
64static TestIDNA* pTestIDNA =NULL;
65
66static const char* fileNames[] = {
67                                    "rfc3491.txt"
68                                 };
69static const UTrie *idnTrie              = NULL;
70static const int32_t *indexes            = NULL;
71static const uint16_t *mappingData       = NULL;
72/* -------------------------------------------------------------------------- */
73
74/* file definitions */
75#define DATA_TYPE "icu"
76
77#define SPREP_DIR "sprep"
78
79extern int
80testData(TestIDNA& test) {
81    char *basename=NULL;
82    UErrorCode errorCode=U_ZERO_ERROR;
83    char *saveBasename =NULL;
84
85    LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode));
86    if(U_FAILURE(errorCode)){
87        test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
88        return errorCode;
89    }
90
91    char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
92    //TODO get the srcDir dynamically
93    const char *srcDir=IntlTest::pathToDataDirectory();
94
95    idnTrie     = &profile->sprepTrie;
96    indexes     = profile->indexes;
97    mappingData = profile->mappingData;
98
99    //initialize
100    pTestIDNA = &test;
101
102    /* prepare the filename beginning with the source dir */
103    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
104        filename[0] = 0x2E;
105        filename[1] = U_FILE_SEP_CHAR;
106        uprv_strcpy(filename+2,srcDir);
107    }else{
108        uprv_strcpy(filename, srcDir);
109    }
110    basename=filename+uprv_strlen(filename);
111    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
112        *basename++=U_FILE_SEP_CHAR;
113    }
114
115    /* process unassigned */
116    basename=filename+uprv_strlen(filename);
117    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
118        *basename++=U_FILE_SEP_CHAR;
119    }
120
121    /* first copy misc directory */
122    saveBasename = basename;
123    (void)saveBasename;    // Suppress set but not used warning.
124    uprv_strcpy(basename,SPREP_DIR);
125    basename = basename + uprv_strlen(SPREP_DIR);
126    *basename++=U_FILE_SEP_CHAR;
127
128    /* process unassigned */
129    uprv_strcpy(basename,fileNames[0]);
130    parseMappings(filename,TRUE, test,&errorCode);
131    if(U_FAILURE(errorCode)) {
132        test.errln( "Could not open file %s for reading \n", filename);
133        return errorCode;
134    }
135
136    testAllCodepoints(test);
137
138    pTestIDNA = NULL;
139    free(filename);
140    return errorCode;
141}
142U_CDECL_BEGIN
143
144static void U_CALLCONV
145strprepProfileLineFn(void * /*context*/,
146              char *fields[][2], int32_t fieldCount,
147              UErrorCode *pErrorCode) {
148    uint32_t mapping[40];
149    char *end, *map;
150    uint32_t code;
151    int32_t length;
152   /*UBool* mapWithNorm = (UBool*) context;*/
153    const char* typeName;
154    uint32_t rangeStart=0,rangeEnd =0;
155    const char *s;
156
157    s = u_skipWhitespace(fields[0][0]);
158    if (*s == '@') {
159        /* a special directive introduced in 4.2 */
160        return;
161    }
162
163    if(fieldCount != 3){
164        *pErrorCode = U_INVALID_FORMAT_ERROR;
165        return;
166    }
167
168    typeName = fields[2][0];
169    map = fields[1][0];
170
171    if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
172
173        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
174
175        /* store the range */
176        compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
177
178    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
179
180        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
181
182        /* store the range */
183        compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
184
185    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
186        /* get the character code, field 0 */
187        code=(uint32_t)uprv_strtoul(s, &end, 16);
188
189        /* parse the mapping string */
190        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
191
192        /* store the mapping */
193        compareMapping(code,mapping, length,USPREP_MAP);
194
195    }else{
196        *pErrorCode = U_INVALID_FORMAT_ERROR;
197    }
198
199}
200
201U_CDECL_END
202
203static void
204parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
205    char *fields[3][2];
206
207    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
208        return;
209    }
210
211    u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
212
213    //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
214
215    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
216        test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
217    }
218}
219
220
221static inline UStringPrepType
222getValues(uint32_t result, int32_t& value, UBool& isIndex){
223
224    UStringPrepType type;
225
226    if(result == 0){
227        /*
228         * Initial value stored in the mapping table
229         * just return USPREP_TYPE_LIMIT .. so that
230         * the source codepoint is copied to the destination
231         */
232        type = USPREP_TYPE_LIMIT;
233        isIndex =FALSE;
234        value = 0;
235    }else if(result >= _SPREP_TYPE_THRESHOLD){
236        type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
237        isIndex =FALSE;
238        value = 0;
239    }else{
240        /* get the state */
241        type = USPREP_MAP;
242        /* ascertain if the value is index or delta */
243        if(result & 0x02){
244            isIndex = TRUE;
245            value = result  >> 2; //mask off the lower 2 bits and shift
246
247        }else{
248            isIndex = FALSE;
249            value = (int16_t)result;
250            value =  (value >> 2);
251
252        }
253        if((result>>2) == _SPREP_MAX_INDEX_VALUE){
254            type = USPREP_DELETE;
255            isIndex =FALSE;
256            value = 0;
257        }
258    }
259    return type;
260}
261
262
263
264static void
265testAllCodepoints(TestIDNA& test){
266    /*
267    {
268        UChar str[19] = {
269                            0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
270                            0x070F,//prohibited
271                            0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
272                        };
273        uint32_t in[19] = {0};
274        UErrorCode status = U_ZERO_ERROR;
275        int32_t inLength=0, outLength=100;
276        char output[100] = {0};
277        punycode_status error;
278        u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
279
280        error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
281        printf(output);
282
283    }
284    */
285
286    uint32_t i = 0;
287    int32_t unassigned      = 0;
288    int32_t prohibited      = 0;
289    int32_t mappedWithNorm  = 0;
290    int32_t mapped          = 0;
291    int32_t noValueInTrie   = 0;
292
293    UStringPrepType type;
294    int32_t value;
295    UBool isIndex = FALSE;
296
297    for(i=0;i<=0x10FFFF;i++){
298        uint32_t result = 0;
299        UTRIE_GET16(idnTrie,i, result);
300        type = getValues(result,value, isIndex);
301        if(type != USPREP_TYPE_LIMIT ){
302            if(type == USPREP_UNASSIGNED){
303                unassigned++;
304            }
305            if(type == USPREP_PROHIBITED){
306                prohibited++;
307            }
308            if(type == USPREP_MAP){
309                mapped++;
310            }
311        }else{
312            noValueInTrie++;
313            if(result > 0){
314                test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
315            }
316        }
317    }
318
319    test.logln("Number of Unassinged code points : %i \n",unassigned);
320    test.logln("Number of Prohibited code points : %i \n",prohibited);
321    test.logln("Number of Mapped code points : %i \n",mapped);
322    test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
323    test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
324
325
326}
327
328static void
329compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
330               UStringPrepType type){
331    uint32_t result = 0;
332    UTRIE_GET16(idnTrie,codepoint, result);
333
334    int32_t length=0;
335    UBool isIndex;
336    UStringPrepType retType;
337    int32_t value, index=0, delta=0;
338
339    retType = getValues(result,value,isIndex);
340
341
342    if(type != retType && retType != USPREP_DELETE){
343
344        pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
345
346    }
347
348    if(isIndex){
349        index = value;
350        if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
351                 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
352            length = 1;
353        }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
354                 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
355            length = 2;
356        }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
357                 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
358            length = 3;
359        }else{
360            length = mappingData[index++];
361        }
362    }else{
363        delta = value;
364        length = (retType == USPREP_DELETE)? 0 :  1;
365    }
366
367    int32_t realLength =0;
368    /* figure out the real length */
369    for(int32_t j=0; j<mapLength; j++){
370        if(mapping[j] > 0xFFFF){
371            realLength +=2;
372        }else{
373            realLength++;
374        }
375    }
376
377    if(realLength != length){
378        pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
379    }
380
381    if(isIndex){
382        for(int8_t i =0; i< mapLength; i++){
383            if(mapping[i] <= 0xFFFF){
384                if(mappingData[index+i] != (uint16_t)mapping[i]){
385                    pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
386                }
387            }else{
388                UChar lead  = U16_LEAD(mapping[i]);
389                UChar trail = U16_TRAIL(mapping[i]);
390                if(mappingData[index+i] != lead ||
391                    mappingData[index+i+1] != trail){
392                    pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
393                }
394            }
395        }
396    }else{
397        if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
398            pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
399        }
400    }
401
402}
403
404static void
405compareFlagsForRange(uint32_t start, uint32_t end,
406                     UStringPrepType type){
407
408    uint32_t result =0 ;
409    UStringPrepType retType;
410    UBool isIndex=FALSE;
411    int32_t value=0;
412/*
413    // supplementary code point
414    UChar __lead16=U16_LEAD(0x2323E);
415    int32_t __offset;
416
417    // get data for lead surrogate
418    (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
419    __offset=(&idnTrie)->getFoldingOffset(result);
420
421    // get the real data from the folded lead/trail units
422    if(__offset>0) {
423        (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
424    } else {
425        (result)=(uint32_t)((&idnTrie)->initialValue);
426    }
427
428    UTRIE_GET16(&idnTrie,0x2323E, result);
429*/
430    while(start < end+1){
431        UTRIE_GET16(idnTrie,start, result);
432        retType = getValues(result,value,isIndex);
433        if(result > _SPREP_TYPE_THRESHOLD){
434            if(retType != type){
435                pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
436            }
437        }else{
438            if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
439                pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
440            }
441        }
442
443        start++;
444    }
445
446}
447
448
449#endif /* #if !UCONFIG_NO_IDNA */
450
451/*
452 * Hey, Emacs, please set the following:
453 *
454 * Local Variables:
455 * indent-tabs-mode: nil
456 * End:
457 *
458 */
459