1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  nptrans.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003feb1
16 *   created by: Ram Viswanadha
17 */
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_TRANSLITERATION
22#if !UCONFIG_NO_IDNA
23
24#include "nptrans.h"
25#include "unicode/resbund.h"
26#include "unicode/uniset.h"
27#include "sprpimpl.h"
28#include "cmemory.h"
29#include "ustr_imp.h"
30#include "intltest.h"
31
32#ifdef NPTRANS_DEBUG
33#include <stdio.h>
34#endif
35
36const char NamePrepTransform::fgClassID=0;
37
38//Factory method
39NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
40    NamePrepTransform* transform = new NamePrepTransform(parseError, status);
41    if(U_FAILURE(status)){
42        delete transform;
43        return NULL;
44    }
45    return transform;
46}
47
48//constructor
49NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
50: unassigned(), prohibited(), labelSeparatorSet(){
51
52    mapping = NULL;
53    bundle = NULL;
54
55
56    const char* testDataName = IntlTest::loadTestData(status);
57
58    if(U_FAILURE(status)){
59        return;
60    }
61
62    bundle = ures_openDirect(testDataName,"idna_rules",&status);
63
64    if(bundle != NULL && U_SUCCESS(status)){
65        // create the mapping transliterator
66        int32_t ruleLen = 0;
67        const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
68        int32_t mapRuleLen = 0;
69        const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
70        UnicodeString rule(mapRuleUChar, mapRuleLen);
71        rule.append(ruleUChar, ruleLen);
72
73        mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
74                                                   UTRANS_FORWARD, parseError,status);
75        if(U_FAILURE(status)) {
76          return;
77        }
78
79        //create the unassigned set
80        int32_t patternLen =0;
81        const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
82        unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
83
84        //create prohibited set
85        patternLen=0;
86        pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
87        UnicodeString test(pattern,patternLen);
88        prohibited.applyPattern(test,status);
89#ifdef NPTRANS_DEBUG
90        if(U_FAILURE(status)){
91            printf("Construction of Unicode set failed\n");
92        }
93
94        if(U_SUCCESS(status)){
95            if(prohibited.contains((UChar) 0x644)){
96                printf("The string contains 0x644 ... !!\n");
97            }
98            UnicodeString temp;
99            prohibited.toPattern(temp,TRUE);
100
101            for(int32_t i=0;i<temp.length();i++){
102                printf("%c", (char)temp.charAt(i));
103            }
104            printf("\n");
105        }
106#endif
107
108        //create label separator set
109        patternLen=0;
110        pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
111        labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
112    }
113
114    if(U_SUCCESS(status) &&
115        (mapping == NULL)
116      ){
117        status = U_MEMORY_ALLOCATION_ERROR;
118        delete mapping;
119        ures_close(bundle);
120        mapping = NULL;
121        bundle = NULL;
122    }
123
124}
125
126
127UBool NamePrepTransform::isProhibited(UChar32 ch){
128    return (UBool)(ch != ASCII_SPACE);
129}
130
131NamePrepTransform::~NamePrepTransform(){
132    delete mapping;
133    mapping = NULL;
134
135    //close the bundle
136    ures_close(bundle);
137    bundle = NULL;
138}
139
140
141int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
142                        UChar* dest, int32_t destCapacity,
143                        UBool allowUnassigned,
144                        UParseError* /*parseError*/,
145                        UErrorCode& status ){
146
147    if(U_FAILURE(status)){
148        return 0;
149    }
150    //check arguments
151    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
152        status=U_ILLEGAL_ARGUMENT_ERROR;
153        return 0;
154    }
155
156    UnicodeString rsource(src,srcLength);
157    // map the code points
158    // transliteration also performs NFKC
159    mapping->transliterate(rsource);
160
161    const UChar* buffer = rsource.getBuffer();
162    int32_t bufLen = rsource.length();
163    // check if unassigned
164    if(allowUnassigned == FALSE){
165        int32_t bufIndex=0;
166        UChar32 ch =0 ;
167        for(;bufIndex<bufLen;){
168            U16_NEXT(buffer, bufIndex, bufLen, ch);
169            if(unassigned.contains(ch)){
170                status = U_IDNA_UNASSIGNED_ERROR;
171                return 0;
172            }
173        }
174    }
175    // check if there is enough room in the output
176    if(bufLen < destCapacity){
177        u_memcpy(dest, buffer, bufLen);
178    }
179
180    return u_terminateUChars(dest, destCapacity, bufLen, &status);
181}
182
183
184#define MAX_BUFFER_SIZE 300
185
186int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
187                                    UChar* dest, int32_t destCapacity,
188                                    UBool allowUnassigned,
189                                    UParseError* parseError,
190                                    UErrorCode& status ){
191    // check error status
192    if(U_FAILURE(status)){
193        return 0;
194    }
195
196    //check arguments
197    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
198        status=U_ILLEGAL_ARGUMENT_ERROR;
199        return 0;
200    }
201
202    UnicodeString b1String;
203    UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
204    int32_t b1Len;
205
206    int32_t b1Index = 0;
207    UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
208    UBool leftToRight=FALSE, rightToLeft=FALSE;
209
210    b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
211    b1String.releaseBuffer(b1Len);
212
213    if(status == U_BUFFER_OVERFLOW_ERROR){
214        // redo processing of string
215        /* we do not have enough room so grow the buffer*/
216        b1 = b1String.getBuffer(b1Len);
217        status = U_ZERO_ERROR; // reset error
218        b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
219        b1String.releaseBuffer(b1Len);
220    }
221
222    if(U_FAILURE(status)){
223        b1Len = 0;
224        goto CLEANUP;
225    }
226
227
228    for(; b1Index<b1Len; ){
229
230        UChar32 ch = 0;
231
232        U16_NEXT(b1, b1Index, b1Len, ch);
233
234        if(prohibited.contains(ch) && ch!=0x0020){
235            status = U_IDNA_PROHIBITED_ERROR;
236            b1Len = 0;
237            goto CLEANUP;
238        }
239
240        direction = u_charDirection(ch);
241        if(firstCharDir==U_CHAR_DIRECTION_COUNT){
242            firstCharDir = direction;
243        }
244        if(direction == U_LEFT_TO_RIGHT){
245            leftToRight = TRUE;
246        }
247        if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
248            rightToLeft = TRUE;
249        }
250    }
251
252    // satisfy 2
253    if( leftToRight == TRUE && rightToLeft == TRUE){
254        status = U_IDNA_CHECK_BIDI_ERROR;
255        b1Len = 0;
256        goto CLEANUP;
257    }
258
259    //satisfy 3
260    if( rightToLeft == TRUE &&
261        !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
262          (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
263       ){
264        status = U_IDNA_CHECK_BIDI_ERROR;
265        return FALSE;
266    }
267
268    if(b1Len <= destCapacity){
269        u_memmove(dest, b1, b1Len);
270    }
271
272CLEANUP:
273    return u_terminateUChars(dest, destCapacity, b1Len, &status);
274}
275
276UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
277    // check error status
278    if(U_FAILURE(status)){
279        return FALSE;
280    }
281
282    return labelSeparatorSet.contains(ch);
283}
284
285#endif /* #if !UCONFIG_NO_IDNA */
286#endif /* #if !UCONFIG_NO_TRANSLITERATION */
287