1/*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003-2009, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  uidna.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003feb1
14 *   created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_IDNA
20
21#include "unicode/uidna.h"
22#include "unicode/ustring.h"
23#include "unicode/usprep.h"
24#include "punycode.h"
25#include "ustr_imp.h"
26#include "cmemory.h"
27#include "uassert.h"
28#include "sprpimpl.h"
29
30/* it is official IDNA ACE Prefix is "xn--" */
31static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
32#define ACE_PREFIX_LENGTH 4
33
34#define MAX_LABEL_LENGTH 63
35/* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
36#define MAX_LABEL_BUFFER_SIZE 100
37
38#define MAX_DOMAIN_NAME_LENGTH 255
39/* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
40#define MAX_IDN_BUFFER_SIZE   MAX_DOMAIN_NAME_LENGTH+1
41
42#define LOWER_CASE_DELTA 0x0020
43#define HYPHEN           0x002D
44#define FULL_STOP        0x002E
45#define CAPITAL_A        0x0041
46#define CAPITAL_Z        0x005A
47
48inline static UChar
49toASCIILower(UChar ch){
50    if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
51        return ch + LOWER_CASE_DELTA;
52    }
53    return ch;
54}
55
56inline static UBool
57startsWithPrefix(const UChar* src , int32_t srcLength){
58    UBool startsWithPrefix = TRUE;
59
60    if(srcLength < ACE_PREFIX_LENGTH){
61        return FALSE;
62    }
63
64    for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
65        if(toASCIILower(src[i]) != ACE_PREFIX[i]){
66            startsWithPrefix = FALSE;
67        }
68    }
69    return startsWithPrefix;
70}
71
72
73inline static int32_t
74compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
75                            const UChar* s2, int32_t s2Len){
76
77    int32_t minLength;
78    int32_t lengthResult;
79
80    // are we comparing different lengths?
81    if(s1Len != s2Len) {
82        if(s1Len < s2Len) {
83            minLength = s1Len;
84            lengthResult = -1;
85        } else {
86            minLength = s2Len;
87            lengthResult = 1;
88        }
89    } else {
90        // ok the lengths are equal
91        minLength = s1Len;
92        lengthResult = 0;
93    }
94
95    UChar c1,c2;
96    int32_t rc;
97
98    for(int32_t i =0;/* no condition */;i++) {
99
100        /* If we reach the ends of both strings then they match */
101        if(i == minLength) {
102            return lengthResult;
103        }
104
105        c1 = s1[i];
106        c2 = s2[i];
107
108        /* Case-insensitive comparison */
109        if(c1!=c2) {
110            rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
111            if(rc!=0) {
112                lengthResult=rc;
113                break;
114            }
115        }
116    }
117    return lengthResult;
118}
119
120
121/**
122 * Ascertain if the given code point is a label separator as
123 * defined by the IDNA RFC
124 *
125 * @param ch The code point to be ascertained
126 * @return true if the char is a label separator
127 * @stable ICU 2.8
128 */
129static inline UBool isLabelSeparator(UChar ch){
130    switch(ch){
131        case 0x002e:
132        case 0x3002:
133        case 0xFF0E:
134        case 0xFF61:
135            return TRUE;
136        default:
137            return FALSE;
138    }
139}
140
141// returns the length of the label excluding the separator
142// if *limit == separator then the length returned does not include
143// the separtor.
144static inline int32_t
145getNextSeparator(UChar *src, int32_t srcLength,
146                 UChar **limit, UBool *done){
147    if(srcLength == -1){
148        int32_t i;
149        for(i=0 ; ;i++){
150            if(src[i] == 0){
151                *limit = src + i; // point to null
152                *done = TRUE;
153                return i;
154            }
155            if(isLabelSeparator(src[i])){
156                *limit = src + (i+1); // go past the delimiter
157                return i;
158
159            }
160        }
161    }else{
162        int32_t i;
163        for(i=0;i<srcLength;i++){
164            if(isLabelSeparator(src[i])){
165                *limit = src + (i+1); // go past the delimiter
166                return i;
167            }
168        }
169        // we have not found the delimiter
170        // if(i==srcLength)
171        *limit = src+srcLength;
172        *done = TRUE;
173
174        return i;
175    }
176}
177static inline UBool isLDHChar(UChar ch){
178    // high runner case
179    if(ch>0x007A){
180        return FALSE;
181    }
182    //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
183    if( (ch==0x002D) ||
184        (0x0030 <= ch && ch <= 0x0039) ||
185        (0x0041 <= ch && ch <= 0x005A) ||
186        (0x0061 <= ch && ch <= 0x007A)
187      ){
188        return TRUE;
189    }
190    return FALSE;
191}
192
193static int32_t
194_internal_toASCII(const UChar* src, int32_t srcLength,
195                  UChar* dest, int32_t destCapacity,
196                  int32_t options,
197                  UStringPrepProfile* nameprep,
198                  UParseError* parseError,
199                  UErrorCode* status)
200{
201
202    // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
203    UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
204    //initialize pointers to stack buffers
205    UChar  *b1 = b1Stack, *b2 = b2Stack;
206    int32_t b1Len=0, b2Len,
207            b1Capacity = MAX_LABEL_BUFFER_SIZE,
208            b2Capacity = MAX_LABEL_BUFFER_SIZE ,
209            reqLength=0;
210
211    int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
212    UBool* caseFlags = NULL;
213
214    // the source contains all ascii codepoints
215    UBool srcIsASCII  = TRUE;
216    // assume the source contains all LDH codepoints
217    UBool srcIsLDH = TRUE;
218
219    int32_t j=0;
220
221    //get the options
222    UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
223
224    int32_t failPos = -1;
225
226    if(srcLength == -1){
227        srcLength = u_strlen(src);
228    }
229
230    if(srcLength > b1Capacity){
231        b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
232        if(b1==NULL){
233            *status = U_MEMORY_ALLOCATION_ERROR;
234            goto CLEANUP;
235        }
236        b1Capacity = srcLength;
237    }
238
239    // step 1
240    for( j=0;j<srcLength;j++){
241        if(src[j] > 0x7F){
242            srcIsASCII = FALSE;
243        }
244        b1[b1Len++] = src[j];
245    }
246
247    // step 2 is performed only if the source contains non ASCII
248    if(srcIsASCII == FALSE){
249
250        // step 2
251        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
252
253        if(*status == U_BUFFER_OVERFLOW_ERROR){
254            // redo processing of string
255            // we do not have enough room so grow the buffer
256            if(b1 != b1Stack){
257                uprv_free(b1);
258            }
259            b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
260            if(b1==NULL){
261                *status = U_MEMORY_ALLOCATION_ERROR;
262                goto CLEANUP;
263            }
264
265            *status = U_ZERO_ERROR; // reset error
266
267            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
268        }
269    }
270    // error bail out
271    if(U_FAILURE(*status)){
272        goto CLEANUP;
273    }
274    if(b1Len == 0){
275        *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
276        goto CLEANUP;
277    }
278
279    // for step 3 & 4
280    srcIsASCII = TRUE;
281    for( j=0;j<b1Len;j++){
282        // check if output of usprep_prepare is all ASCII
283        if(b1[j] > 0x7F){
284            srcIsASCII = FALSE;
285        }else if(isLDHChar(b1[j])==FALSE){  // if the char is in ASCII range verify that it is an LDH character
286            srcIsLDH = FALSE;
287            failPos = j;
288        }
289    }
290    if(useSTD3ASCIIRules == TRUE){
291        // verify 3a and 3b
292        // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
293        //  absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
294        // 3(b) Verify the absence of leading and trailing hyphen-minus; that
295        //  is, the absence of U+002D at the beginning and end of the
296        //  sequence.
297        if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
298            || b1[0] ==  HYPHEN || b1[b1Len-1] == HYPHEN){
299            *status = U_IDNA_STD3_ASCII_RULES_ERROR;
300
301            /* populate the parseError struct */
302            if(srcIsLDH==FALSE){
303                // failPos is always set the index of failure
304                uprv_syntaxError(b1,failPos, b1Len,parseError);
305            }else if(b1[0] == HYPHEN){
306                // fail position is 0
307                uprv_syntaxError(b1,0,b1Len,parseError);
308            }else{
309                // the last index in the source is always length-1
310                uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
311            }
312
313            goto CLEANUP;
314        }
315    }
316    // Step 4: if the source is ASCII then proceed to step 8
317    if(srcIsASCII){
318        if(b1Len <= destCapacity){
319            uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
320            reqLength = b1Len;
321        }else{
322            reqLength = b1Len;
323            goto CLEANUP;
324        }
325    }else{
326        // step 5 : verify the sequence does not begin with ACE prefix
327        if(!startsWithPrefix(b1,b1Len)){
328
329            //step 6: encode the sequence with punycode
330
331            // do not preserve the case flags for now!
332            // TODO: Preserve the case while implementing the RFE
333            // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
334            // uprv_memset(caseFlags,TRUE,b1Len);
335
336            b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
337
338            if(*status == U_BUFFER_OVERFLOW_ERROR){
339                // redo processing of string
340                /* we do not have enough room so grow the buffer*/
341                b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
342                if(b2 == NULL){
343                    *status = U_MEMORY_ALLOCATION_ERROR;
344                    goto CLEANUP;
345                }
346
347                *status = U_ZERO_ERROR; // reset error
348
349                b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
350            }
351            //error bail out
352            if(U_FAILURE(*status)){
353                goto CLEANUP;
354            }
355            // TODO : Reconsider while implementing the case preserve RFE
356            // convert all codepoints to lower case ASCII
357            // toASCIILower(b2,b2Len);
358            reqLength = b2Len+ACE_PREFIX_LENGTH;
359
360            if(reqLength > destCapacity){
361                *status = U_BUFFER_OVERFLOW_ERROR;
362                goto CLEANUP;
363            }
364            //Step 7: prepend the ACE prefix
365            uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
366            //Step 6: copy the contents in b2 into dest
367            uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
368
369        }else{
370            *status = U_IDNA_ACE_PREFIX_ERROR;
371            //position of failure is 0
372            uprv_syntaxError(b1,0,b1Len,parseError);
373            goto CLEANUP;
374        }
375    }
376    // step 8: verify the length of label
377    if(reqLength > MAX_LABEL_LENGTH){
378        *status = U_IDNA_LABEL_TOO_LONG_ERROR;
379    }
380
381CLEANUP:
382    if(b1 != b1Stack){
383        uprv_free(b1);
384    }
385    if(b2 != b2Stack){
386        uprv_free(b2);
387    }
388    uprv_free(caseFlags);
389
390    return u_terminateUChars(dest, destCapacity, reqLength, status);
391}
392
393static int32_t
394_internal_toUnicode(const UChar* src, int32_t srcLength,
395                    UChar* dest, int32_t destCapacity,
396                    int32_t options,
397                    UStringPrepProfile* nameprep,
398                    UParseError* parseError,
399                    UErrorCode* status)
400{
401
402    //get the options
403    //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
404    int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
405
406    // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
407    UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
408
409    //initialize pointers to stack buffers
410    UChar  *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
411    int32_t b1Len, b2Len, b1PrimeLen, b3Len,
412            b1Capacity = MAX_LABEL_BUFFER_SIZE,
413            b2Capacity = MAX_LABEL_BUFFER_SIZE,
414            b3Capacity = MAX_LABEL_BUFFER_SIZE,
415            reqLength=0;
416
417    b1Len = 0;
418    UBool* caseFlags = NULL;
419
420    UBool srcIsASCII = TRUE;
421    /*UBool srcIsLDH = TRUE;
422    int32_t failPos =0;*/
423
424    // step 1: find out if all the codepoints in src are ASCII
425    if(srcLength==-1){
426        srcLength = 0;
427        for(;src[srcLength]!=0;){
428            if(src[srcLength]> 0x7f){
429                srcIsASCII = FALSE;
430            }/*else if(isLDHChar(src[srcLength])==FALSE){
431                // here we do not assemble surrogates
432                // since we know that LDH code points
433                // are in the ASCII range only
434                srcIsLDH = FALSE;
435                failPos = srcLength;
436            }*/
437            srcLength++;
438        }
439    }else if(srcLength > 0){
440        for(int32_t j=0; j<srcLength; j++){
441            if(src[j]> 0x7f){
442                srcIsASCII = FALSE;
443            }/*else if(isLDHChar(src[j])==FALSE){
444                // here we do not assemble surrogates
445                // since we know that LDH code points
446                // are in the ASCII range only
447                srcIsLDH = FALSE;
448                failPos = j;
449            }*/
450        }
451    }else{
452        return 0;
453    }
454
455    if(srcIsASCII == FALSE){
456        // step 2: process the string
457        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
458        if(*status == U_BUFFER_OVERFLOW_ERROR){
459            // redo processing of string
460            /* we do not have enough room so grow the buffer*/
461            b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
462            if(b1==NULL){
463                *status = U_MEMORY_ALLOCATION_ERROR;
464                goto CLEANUP;
465            }
466
467            *status = U_ZERO_ERROR; // reset error
468
469            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
470        }
471        //bail out on error
472        if(U_FAILURE(*status)){
473            goto CLEANUP;
474        }
475    }else{
476
477        //just point src to b1
478        b1 = (UChar*) src;
479        b1Len = srcLength;
480    }
481
482    // The RFC states that
483    // <quote>
484    // ToUnicode never fails. If any step fails, then the original input
485    // is returned immediately in that step.
486    // </quote>
487
488    //step 3: verify ACE Prefix
489    if(startsWithPrefix(b1,b1Len)){
490
491        //step 4: Remove the ACE Prefix
492        b1Prime = b1 + ACE_PREFIX_LENGTH;
493        b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
494
495        //step 5: Decode using punycode
496        b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
497
498        if(*status == U_BUFFER_OVERFLOW_ERROR){
499            // redo processing of string
500            /* we do not have enough room so grow the buffer*/
501            b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
502            if(b2==NULL){
503                *status = U_MEMORY_ALLOCATION_ERROR;
504                goto CLEANUP;
505            }
506
507            *status = U_ZERO_ERROR; // reset error
508
509            b2Len =  u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
510        }
511
512
513        //step 6:Apply toASCII
514        b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
515
516        if(*status == U_BUFFER_OVERFLOW_ERROR){
517            // redo processing of string
518            /* we do not have enough room so grow the buffer*/
519            b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
520            if(b3==NULL){
521                *status = U_MEMORY_ALLOCATION_ERROR;
522                goto CLEANUP;
523            }
524
525            *status = U_ZERO_ERROR; // reset error
526
527            b3Len =  uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
528
529        }
530        //bail out on error
531        if(U_FAILURE(*status)){
532            goto CLEANUP;
533        }
534
535        //step 7: verify
536        if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
537            // Cause the original to be returned.
538            *status = U_IDNA_VERIFICATION_ERROR;
539            goto CLEANUP;
540        }
541
542        //step 8: return output of step 5
543        reqLength = b2Len;
544        if(b2Len <= destCapacity) {
545            uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
546        }
547    }
548    else{
549        // See the start of this if statement for why this is commented out.
550        // verify that STD3 ASCII rules are satisfied
551        /*if(useSTD3ASCIIRules == TRUE){
552            if( srcIsLDH == FALSE // source contains some non-LDH characters
553                || src[0] ==  HYPHEN || src[srcLength-1] == HYPHEN){
554                *status = U_IDNA_STD3_ASCII_RULES_ERROR;
555
556                // populate the parseError struct
557                if(srcIsLDH==FALSE){
558                    // failPos is always set the index of failure
559                    uprv_syntaxError(src,failPos, srcLength,parseError);
560                }else if(src[0] == HYPHEN){
561                    // fail position is 0
562                    uprv_syntaxError(src,0,srcLength,parseError);
563                }else{
564                    // the last index in the source is always length-1
565                    uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
566                }
567
568                goto CLEANUP;
569            }
570        }*/
571        // just return the source
572        //copy the source to destination
573        if(srcLength <= destCapacity){
574            uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
575        }
576        reqLength = srcLength;
577    }
578
579
580CLEANUP:
581
582    if(b1 != b1Stack && b1!=src){
583        uprv_free(b1);
584    }
585    if(b2 != b2Stack){
586        uprv_free(b2);
587    }
588    uprv_free(caseFlags);
589
590    // The RFC states that
591    // <quote>
592    // ToUnicode never fails. If any step fails, then the original input
593    // is returned immediately in that step.
594    // </quote>
595    // So if any step fails lets copy source to destination
596    if(U_FAILURE(*status)){
597        //copy the source to destination
598        if(dest && srcLength <= destCapacity){
599            // srcLength should have already been set earlier.
600            U_ASSERT(srcLength >= 0);
601            uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
602        }
603        reqLength = srcLength;
604        *status = U_ZERO_ERROR;
605    }
606
607    return u_terminateUChars(dest, destCapacity, reqLength, status);
608}
609
610U_CAPI int32_t U_EXPORT2
611uidna_toASCII(const UChar* src, int32_t srcLength,
612              UChar* dest, int32_t destCapacity,
613              int32_t options,
614              UParseError* parseError,
615              UErrorCode* status){
616
617    if(status == NULL || U_FAILURE(*status)){
618        return 0;
619    }
620    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
621        *status = U_ILLEGAL_ARGUMENT_ERROR;
622        return 0;
623    }
624
625    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
626
627    if(U_FAILURE(*status)){
628        return -1;
629    }
630
631    int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
632
633    /* close the profile*/
634    usprep_close(nameprep);
635
636    return retLen;
637}
638
639U_CAPI int32_t U_EXPORT2
640uidna_toUnicode(const UChar* src, int32_t srcLength,
641                UChar* dest, int32_t destCapacity,
642                int32_t options,
643                UParseError* parseError,
644                UErrorCode* status){
645
646    if(status == NULL || U_FAILURE(*status)){
647        return 0;
648    }
649    if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
650        *status = U_ILLEGAL_ARGUMENT_ERROR;
651        return 0;
652    }
653
654    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
655
656    if(U_FAILURE(*status)){
657        return -1;
658    }
659
660    int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
661
662    usprep_close(nameprep);
663
664    return retLen;
665}
666
667
668U_CAPI int32_t U_EXPORT2
669uidna_IDNToASCII(  const UChar *src, int32_t srcLength,
670                   UChar* dest, int32_t destCapacity,
671                   int32_t options,
672                   UParseError *parseError,
673                   UErrorCode *status){
674
675    if(status == NULL || U_FAILURE(*status)){
676        return 0;
677    }
678    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
679        *status = U_ILLEGAL_ARGUMENT_ERROR;
680        return 0;
681    }
682
683    int32_t reqLength = 0;
684
685    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
686
687    if(U_FAILURE(*status)){
688        return 0;
689    }
690
691    //initialize pointers
692    UChar *delimiter = (UChar*)src;
693    UChar *labelStart = (UChar*)src;
694    UChar *currentDest = (UChar*) dest;
695    int32_t remainingLen = srcLength;
696    int32_t remainingDestCapacity = destCapacity;
697    int32_t labelLen = 0, labelReqLength = 0;
698    UBool done = FALSE;
699
700
701    for(;;){
702
703        labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
704        labelReqLength = 0;
705        if(!(labelLen==0 && done)){// make sure this is not a root label separator.
706
707            labelReqLength = _internal_toASCII( labelStart, labelLen,
708                                                currentDest, remainingDestCapacity,
709                                                options, nameprep,
710                                                parseError, status);
711
712            if(*status == U_BUFFER_OVERFLOW_ERROR){
713
714                *status = U_ZERO_ERROR; // reset error
715                remainingDestCapacity = 0;
716            }
717        }
718
719
720        if(U_FAILURE(*status)){
721            break;
722        }
723
724        reqLength +=labelReqLength;
725        // adjust the destination pointer
726        if(labelReqLength < remainingDestCapacity){
727            currentDest = currentDest + labelReqLength;
728            remainingDestCapacity -= labelReqLength;
729        }else{
730            // should never occur
731            remainingDestCapacity = 0;
732        }
733
734        if(done == TRUE){
735            break;
736        }
737
738        // add the label separator
739        if(remainingDestCapacity > 0){
740            *currentDest++ = FULL_STOP;
741            remainingDestCapacity--;
742        }
743        reqLength++;
744
745        labelStart = delimiter;
746        if(remainingLen >0 ){
747            remainingLen = (int32_t)(srcLength - (delimiter - src));
748        }
749
750    }
751
752    if(reqLength > MAX_DOMAIN_NAME_LENGTH){
753        *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
754    }
755
756    usprep_close(nameprep);
757
758    return u_terminateUChars(dest, destCapacity, reqLength, status);
759}
760
761U_CAPI int32_t U_EXPORT2
762uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
763                     UChar* dest, int32_t destCapacity,
764                     int32_t options,
765                     UParseError* parseError,
766                     UErrorCode* status){
767
768    if(status == NULL || U_FAILURE(*status)){
769        return 0;
770    }
771    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
772        *status = U_ILLEGAL_ARGUMENT_ERROR;
773        return 0;
774    }
775
776    int32_t reqLength = 0;
777
778    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
779
780    if(U_FAILURE(*status)){
781        return 0;
782    }
783
784    //initialize pointers
785    UChar *delimiter = (UChar*)src;
786    UChar *labelStart = (UChar*)src;
787    UChar *currentDest = (UChar*) dest;
788    int32_t remainingLen = srcLength;
789    int32_t remainingDestCapacity = destCapacity;
790    int32_t labelLen = 0, labelReqLength = 0;
791    UBool done = FALSE;
792
793    for(;;){
794
795        labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
796
797        // The RFC states that
798        // <quote>
799        // ToUnicode never fails. If any step fails, then the original input
800        // is returned immediately in that step.
801        // </quote>
802        // _internal_toUnicode will copy the label.
803        /*if(labelLen==0 && done==FALSE){
804            *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
805            break;
806        }*/
807
808        labelReqLength = _internal_toUnicode(labelStart, labelLen,
809                                             currentDest, remainingDestCapacity,
810                                             options, nameprep,
811                                             parseError, status);
812
813        if(*status == U_BUFFER_OVERFLOW_ERROR){
814            *status = U_ZERO_ERROR; // reset error
815            remainingDestCapacity = 0;
816        }
817
818        if(U_FAILURE(*status)){
819            break;
820        }
821
822        reqLength +=labelReqLength;
823        // adjust the destination pointer
824        if(labelReqLength < remainingDestCapacity){
825            currentDest = currentDest + labelReqLength;
826            remainingDestCapacity -= labelReqLength;
827        }else{
828            // should never occur
829            remainingDestCapacity = 0;
830        }
831
832        if(done == TRUE){
833            break;
834        }
835
836        // add the label separator
837        // Unlike the ToASCII operation we don't normalize the label separators
838        if(remainingDestCapacity > 0){
839            *currentDest++ = *(labelStart + labelLen);
840            remainingDestCapacity--;
841        }
842        reqLength++;
843
844        labelStart = delimiter;
845        if(remainingLen >0 ){
846            remainingLen = (int32_t)(srcLength - (delimiter - src));
847        }
848
849    }
850
851    if(reqLength > MAX_DOMAIN_NAME_LENGTH){
852        *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
853    }
854
855    usprep_close(nameprep);
856
857    return u_terminateUChars(dest, destCapacity, reqLength, status);
858}
859
860U_CAPI int32_t U_EXPORT2
861uidna_compare(  const UChar *s1, int32_t length1,
862                const UChar *s2, int32_t length2,
863                int32_t options,
864                UErrorCode* status){
865
866    if(status == NULL || U_FAILURE(*status)){
867        return -1;
868    }
869
870    UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
871    UChar *b1 = b1Stack, *b2 = b2Stack;
872    int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
873    int32_t result=-1;
874
875    UParseError parseError;
876
877    b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
878    if(*status == U_BUFFER_OVERFLOW_ERROR){
879        // redo processing of string
880        b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
881        if(b1==NULL){
882            *status = U_MEMORY_ALLOCATION_ERROR;
883            goto CLEANUP;
884        }
885
886        *status = U_ZERO_ERROR; // reset error
887
888        b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
889
890    }
891
892    b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
893    if(*status == U_BUFFER_OVERFLOW_ERROR){
894        // redo processing of string
895        b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
896        if(b2==NULL){
897            *status = U_MEMORY_ALLOCATION_ERROR;
898            goto CLEANUP;
899        }
900
901        *status = U_ZERO_ERROR; // reset error
902
903        b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
904
905    }
906    // when toASCII is applied all label separators are replaced with FULL_STOP
907    result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
908
909CLEANUP:
910    if(b1 != b1Stack){
911        uprv_free(b1);
912    }
913
914    if(b2 != b2Stack){
915        uprv_free(b2);
916    }
917
918    return result;
919}
920
921#endif /* #if !UCONFIG_NO_IDNA */
922