1d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen/*
2d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *******************************************************************************
3d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *
4d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   Copyright (C) 2003-2010, International Business Machines
5d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   Corporation and others.  All Rights Reserved.
6d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *
7d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *******************************************************************************
8d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   file name:  nptrans.h
9d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   encoding:   US-ASCII
10d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   tab size:   8 (not used)
11d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   indentation:4
12d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *
13d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   created on: 2003feb1
14d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen *   created by: Ram Viswanadha
15d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen */
16d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
17d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "unicode/utypes.h"
18d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
19d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#if !UCONFIG_NO_TRANSLITERATION
20d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#if !UCONFIG_NO_IDNA
21d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
22d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "nptrans.h"
23d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "unicode/resbund.h"
24d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "unicode/uniset.h"
25d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "sprpimpl.h"
26d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "cmemory.h"
27d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "ustr_imp.h"
28d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include "intltest.h"
29d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
30d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#ifdef DEBUG
31d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#include <stdio.h>
32d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen#endif
33d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
34d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chenconst char NamePrepTransform::fgClassID=0;
35d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
36d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen//Factory method
37d7955ce24d294fb2014c59d11fca184471056f44Shuyi ChenNamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
38d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen    NamePrepTransform* transform = new NamePrepTransform(parseError, status);
39d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen    if(U_FAILURE(status)){
40d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen        delete transform;
41d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen        return NULL;
42d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen    }
43d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen    return transform;
44d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen}
45d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
46d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen//constructor
47d7955ce24d294fb2014c59d11fca184471056f44Shuyi ChenNamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
48d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen: unassigned(), prohibited(), labelSeparatorSet(){
49d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
50d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen    mapping = NULL;
51d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen    bundle = NULL;
52d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
53d7955ce24d294fb2014c59d11fca184471056f44Shuyi Chen
54    const char* testDataName = IntlTest::loadTestData(status);
55
56    if(U_FAILURE(status)){
57        return;
58    }
59
60    bundle = ures_openDirect(testDataName,"idna_rules",&status);
61
62    if(bundle != NULL && U_SUCCESS(status)){
63        // create the mapping transliterator
64        int32_t ruleLen = 0;
65        const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
66        int32_t mapRuleLen = 0;
67        const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
68        UnicodeString rule(mapRuleUChar, mapRuleLen);
69        rule.append(ruleUChar, ruleLen);
70
71        mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
72                                                   UTRANS_FORWARD, parseError,status);
73        if(U_FAILURE(status)) {
74          return;
75        }
76
77        //create the unassigned set
78        int32_t patternLen =0;
79        const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
80        unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
81
82        //create prohibited set
83        patternLen=0;
84        pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
85        UnicodeString test(pattern,patternLen);
86        prohibited.applyPattern(test,status);
87#ifdef DEBUG
88        if(U_FAILURE(status)){
89            printf("Construction of Unicode set failed\n");
90        }
91
92        if(U_SUCCESS(status)){
93            if(prohibited.contains((UChar) 0x644)){
94                printf("The string contains 0x644 ... damn !!\n");
95            }
96            UnicodeString temp;
97            prohibited.toPattern(temp,TRUE);
98
99            for(int32_t i=0;i<temp.length();i++){
100                printf("%c", (char)temp.charAt(i));
101            }
102            printf("\n");
103        }
104#endif
105
106        //create label separator set
107        patternLen=0;
108        pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
109        labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
110    }
111
112    if(U_SUCCESS(status) &&
113        (mapping == NULL)
114      ){
115        status = U_MEMORY_ALLOCATION_ERROR;
116        delete mapping;
117        ures_close(bundle);
118        mapping = NULL;
119        bundle = NULL;
120    }
121
122}
123
124
125UBool NamePrepTransform::isProhibited(UChar32 ch){
126    return (UBool)(ch != ASCII_SPACE);
127}
128
129NamePrepTransform::~NamePrepTransform(){
130    delete mapping;
131    mapping = NULL;
132
133    //close the bundle
134    ures_close(bundle);
135    bundle = NULL;
136}
137
138
139int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
140                        UChar* dest, int32_t destCapacity,
141                        UBool allowUnassigned,
142                        UParseError* /*parseError*/,
143                        UErrorCode& status ){
144
145    if(U_FAILURE(status)){
146        return 0;
147    }
148    //check arguments
149    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
150        status=U_ILLEGAL_ARGUMENT_ERROR;
151        return 0;
152    }
153
154    UnicodeString rsource(src,srcLength);
155    // map the code points
156    // transliteration also performs NFKC
157    mapping->transliterate(rsource);
158
159    const UChar* buffer = rsource.getBuffer();
160    int32_t bufLen = rsource.length();
161    // check if unassigned
162    if(allowUnassigned == FALSE){
163        int32_t bufIndex=0;
164        UChar32 ch =0 ;
165        for(;bufIndex<bufLen;){
166            U16_NEXT(buffer, bufIndex, bufLen, ch);
167            if(unassigned.contains(ch)){
168                status = U_IDNA_UNASSIGNED_ERROR;
169                return 0;
170            }
171        }
172    }
173    // check if there is enough room in the output
174    if(bufLen < destCapacity){
175        uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR);
176    }
177
178    return u_terminateUChars(dest, destCapacity, bufLen, &status);
179}
180
181
182#define MAX_BUFFER_SIZE 300
183
184int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
185                                    UChar* dest, int32_t destCapacity,
186                                    UBool allowUnassigned,
187                                    UParseError* parseError,
188                                    UErrorCode& status ){
189    // check error status
190    if(U_FAILURE(status)){
191        return 0;
192    }
193
194    //check arguments
195    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
196        status=U_ILLEGAL_ARGUMENT_ERROR;
197        return 0;
198    }
199
200    UnicodeString b1String;
201    UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
202    int32_t b1Len;
203
204    int32_t b1Index = 0;
205    UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
206    UBool leftToRight=FALSE, rightToLeft=FALSE;
207
208    b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
209    b1String.releaseBuffer(b1Len);
210
211    if(status == U_BUFFER_OVERFLOW_ERROR){
212        // redo processing of string
213        /* we do not have enough room so grow the buffer*/
214        b1 = b1String.getBuffer(b1Len);
215        status = U_ZERO_ERROR; // reset error
216        b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
217        b1String.releaseBuffer(b1Len);
218    }
219
220    if(U_FAILURE(status)){
221        b1Len = 0;
222        goto CLEANUP;
223    }
224
225
226    for(; b1Index<b1Len; ){
227
228        UChar32 ch = 0;
229
230        U16_NEXT(b1, b1Index, b1Len, ch);
231
232        if(prohibited.contains(ch) && ch!=0x0020){
233            status = U_IDNA_PROHIBITED_ERROR;
234            b1Len = 0;
235            goto CLEANUP;
236        }
237
238        direction = u_charDirection(ch);
239        if(firstCharDir==U_CHAR_DIRECTION_COUNT){
240            firstCharDir = direction;
241        }
242        if(direction == U_LEFT_TO_RIGHT){
243            leftToRight = TRUE;
244        }
245        if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
246            rightToLeft = TRUE;
247        }
248    }
249
250    // satisfy 2
251    if( leftToRight == TRUE && rightToLeft == TRUE){
252        status = U_IDNA_CHECK_BIDI_ERROR;
253        b1Len = 0;
254        goto CLEANUP;
255    }
256
257    //satisfy 3
258    if( rightToLeft == TRUE &&
259        !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
260          (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
261       ){
262        status = U_IDNA_CHECK_BIDI_ERROR;
263        return FALSE;
264    }
265
266    if(b1Len <= destCapacity){
267        uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
268    }
269
270CLEANUP:
271    return u_terminateUChars(dest, destCapacity, b1Len, &status);
272}
273
274UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
275    // check error status
276    if(U_FAILURE(status)){
277        return FALSE;
278    }
279
280    return labelSeparatorSet.contains(ch);
281}
282
283#endif /* #if !UCONFIG_NO_IDNA */
284#endif /* #if !UCONFIG_NO_TRANSLITERATION */
285