12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2003-2011, International Business Machines Corporation and    *
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.stringprep;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.StringPrepParseException;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UCharacterIterator;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author ram
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * To change the template for this generated type comment go to
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Window>Preferences>Java>Code Generation>Code and Comments
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class IDNAReference {
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int ACE_PREFIX_LENGTH  = 4;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int MAX_LABEL_LENGTH   = 63;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int HYPHEN             = 0x002D;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CAPITAL_A          = 0x0041;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CAPITAL_Z          = 0x005A;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int LOWER_CASE_DELTA   = 0x0020;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int FULL_STOP          = 0x002E;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int DEFAULT             = 0x0000;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int ALLOW_UNASSIGNED    = 0x0001;
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int USE_STD3_RULES      = 0x0002;
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final NamePrepTransform transform = NamePrepTransform.getInstance();
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static boolean isReady() {
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return transform.isReady();
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean startsWithPrefix(StringBuffer src){
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean startsWithPrefix = true;
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(src.length() < ACE_PREFIX_LENGTH){
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i=0; i<ACE_PREFIX_LENGTH;i++){
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                startsWithPrefix = false;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return startsWithPrefix;
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static char toASCIILower(char ch){
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (char)(ch + LOWER_CASE_DELTA);
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ch;
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static StringBuffer toASCIILower(StringBuffer src){
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer dest = new StringBuffer();
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i=0; i<src.length();i++){
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dest.append(toASCIILower(src.charAt(i)));
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c1,c2;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int rc;
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i =0;/* no condition */;i++) {
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* If we reach the ends of both strings then they match */
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i == s1.length()) {
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 0;
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c1 = s1.charAt(i);
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c2 = s2.charAt(i);
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* Case-insensitive comparison */
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c1!=c2) {
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                rc=toASCIILower(c1)-toASCIILower(c2);
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(rc!=0) {
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return rc;
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int getSeparatorIndex(char[] src,int start, int limit){
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(; start<limit;start++){
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(NamePrepTransform.isLabelSeparator(src[start])){
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return start;
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // we have not found the separator just return length
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return start;
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isLDHChar(int ch){
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // high runner case
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(ch>0x007A){
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if( (ch==0x002D) ||
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (0x0030 <= ch && ch <= 0x0039) ||
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (0x0041 <= ch && ch <= 0x005A) ||
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (0x0061 <= ch && ch <= 0x007A)
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert          ){
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return false;
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertToASCII(String src, int options)
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator iter = UCharacterIterator.getInstance(src);
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return convertToASCII(iter,options);
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertToASCII(StringBuffer src, int options)
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator iter = UCharacterIterator.getInstance(src);
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return convertToASCII(iter,options);
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throws StringPrepParseException{
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char[] caseFlags = null;
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the source contains all ascii codepoints
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean srcIsASCII  = true;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // assume the source contains all LDH codepoints
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean srcIsLDH = true;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //get the options
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ch;
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // step 1
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while((ch = srcIter.next())!= UCharacterIterator.DONE){
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(ch> 0x7f){
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                srcIsASCII = false;
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int failPos = -1;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        srcIter.setToStart();
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer processOut = null;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // step 2 is performed only if the source contains non ASCII
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!srcIsASCII){
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // step 2
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            processOut =  transform.prepare(srcIter,options);
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }else{
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            processOut = new StringBuffer(srcIter.getText());
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int poLen = processOut.length();
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(poLen==0){
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer dest = new StringBuffer();
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // reset the variable to verify if output of prepare is ASCII or not
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        srcIsASCII = true;
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // step 3 & 4
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int j=0;j<poLen;j++ ){
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ch=processOut.charAt(j);
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(ch > 0x7F){
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                srcIsASCII = false;
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }else if(isLDHChar(ch)==false){
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // here we do not assemble surrogates
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // since we know that LDH code points
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // are in the ASCII range only
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                srcIsLDH = false;
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                failPos = j;
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(useSTD3ASCIIRules == true){
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // verify 3a and 3b
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if( srcIsLDH == false /* source contains some non-LDH characters */
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || processOut.charAt(0) ==  HYPHEN
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || processOut.charAt(processOut.length()-1) == HYPHEN){
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* populate the parseError struct */
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(srcIsLDH==false){
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              processOut.toString(),
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                             (failPos>0) ? (failPos-1) : failPos);
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }else if(processOut.charAt(0) == HYPHEN){
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }else{
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              StringPrepParseException.STD3_ASCII_RULES_ERROR,
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              processOut.toString(),
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              (poLen>0) ? poLen-1 : poLen);
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(srcIsASCII){
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dest =  processOut;
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }else{
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // step 5 : verify the sequence does not begin with ACE prefix
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!startsWithPrefix(processOut)){
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //step 6: encode the sequence with punycode
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags);
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // convert all codepoints to lower case ASCII
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuffer lowerOut = toASCIILower(punyout);
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //Step 7: prepend the ACE prefix
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH);
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //Step 6: copy the contents in b2 into dest
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.append(lowerOut);
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }else{
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new StringPrepParseException("The input does not start with the ACE Prefix.",
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                   StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(dest.length() > MAX_LABEL_LENGTH){
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new StringPrepParseException("The labels in the input are too long. Length > 64.",
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options)
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throws StringPrepParseException{
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return convertIDNToASCII(iter.getText(), options);
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertIDNtoASCII(StringBuffer str,int options)
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throws StringPrepParseException{
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return convertIDNToASCII(str.toString(), options);
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertIDNToASCII(String src,int options)
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throws StringPrepParseException{
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char[] srcArr = src.toCharArray();
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer result = new StringBuffer();
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int sepIndex=0;
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int oldSepIndex = 0;
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;){
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //make sure this is not a root label separator.
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!(label.length()==0 && sepIndex==srcArr.length)){
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                UCharacterIterator iter = UCharacterIterator.getInstance(label);
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result.append(convertToASCII(iter,options));
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(sepIndex==srcArr.length){
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // increment the sepIndex to skip past the separator
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sepIndex++;
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            oldSepIndex = sepIndex;
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.append((char)FULL_STOP);
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertToUnicode(String src, int options)
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert           throws StringPrepParseException{
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator iter = UCharacterIterator.getInstance(src);
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return convertToUnicode(iter,options);
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertToUnicode(StringBuffer src, int options)
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert           throws StringPrepParseException{
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator iter = UCharacterIterator.getInstance(src);
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return convertToUnicode(iter,options);
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert           throws StringPrepParseException{
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the source contains all ascii codepoints
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean srcIsASCII = true;
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ch;
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int saveIndex = iter.getIndex();
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // step 1: find out if all the codepoints in src are ASCII
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while ((ch = iter.next()) != UCharacterIterator.DONE) {
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ch > 0x7F) {
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                srcIsASCII = false;
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The RFC states that
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // <quote>
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ToUnicode never fails. If any step fails, then the original input
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // is returned immediately in that step.
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // </quote>
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuffer processOut;
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (srcIsASCII == false) {
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // step 2: process the string
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                iter.setIndex(saveIndex);
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    processOut = transform.prepare(iter, options);
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch (StringPrepParseException e) {
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // just point to source
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                processOut = new StringBuffer(iter.getText());
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // step 3: verify ACE Prefix
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (startsWithPrefix(processOut)) {
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // step 4: Remove the ACE Prefix
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String temp = processOut.substring(ACE_PREFIX_LENGTH, processOut.length());
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // step 5: Decode using punycode
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuffer decodeOut = null;
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    decodeOut = PunycodeReference.decode(new StringBuffer(temp), null);
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch (StringPrepParseException e) {
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // step 6:Apply toASCII
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuffer toASCIIOut = convertToASCII(decodeOut, options);
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // step 7: verify
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (compareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) {
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // step 8: return output of step 5
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return decodeOut;
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } while (false);
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return new StringBuffer(iter.getText());
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options)
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return convertIDNToUnicode(iter.getText(), options);
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertIDNToUnicode(StringBuffer str, int options)
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return convertIDNToUnicode(str.toString(), options);
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static StringBuffer convertIDNToUnicode(String src, int options)
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char[] srcArr = src.toCharArray();
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer result = new StringBuffer();
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int sepIndex=0;
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int oldSepIndex=0;
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;){
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(label.length()==0 && sepIndex!=srcArr.length ){
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UCharacterIterator iter = UCharacterIterator.getInstance(label);
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.append(convertToUnicode(iter,options));
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(sepIndex==srcArr.length){
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // increment the sepIndex to skip past the separator
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sepIndex++;
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            oldSepIndex = sepIndex;
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.append((char)FULL_STOP);
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  TODO: optimize
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(StringBuffer s1, StringBuffer s2, int options)
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(s1==null || s2 == null){
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("One of the source buffers is null");
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer s1Out = convertIDNToASCII(s1.toString(), options);
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer s2Out = convertIDNToASCII(s2.toString(), options);
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return compareCaseInsensitiveASCII(s1Out,s2Out);
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  TODO: optimize
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(String s1, String s2, int options)
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(s1==null || s2 == null){
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("One of the source buffers is null");
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer s1Out = convertIDNToASCII(s1, options);
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer s2Out = convertIDNToASCII(s2, options);
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return compareCaseInsensitiveASCII(s1Out,s2Out);
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  TODO: optimize
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options)
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        throws StringPrepParseException{
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(i1==null || i2 == null){
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("One of the source buffers is null");
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer s1Out = convertIDNToASCII(i1.getText(), options);
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer s2Out = convertIDNToASCII(i2.getText(), options);
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return compareCaseInsensitiveASCII(s1Out,s2Out);
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
408