12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2003-2011, International Business Machines Corporation and * 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.stringprep; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.StringPrepParseException; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UCharacterIterator; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author ram 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * To change the template for this generated type comment go to 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Window>Preferences>Java>Code Generation>Code and Comments 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class IDNAReference { 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int ACE_PREFIX_LENGTH = 4; 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int MAX_LABEL_LENGTH = 63; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int HYPHEN = 0x002D; 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int CAPITAL_A = 0x0041; 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int CAPITAL_Z = 0x005A; 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int LOWER_CASE_DELTA = 0x0020; 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int FULL_STOP = 0x002E; 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int DEFAULT = 0x0000; 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int ALLOW_UNASSIGNED = 0x0001; 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int USE_STD3_RULES = 0x0002; 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final NamePrepTransform transform = NamePrepTransform.getInstance(); 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static boolean isReady() { 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return transform.isReady(); 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static boolean startsWithPrefix(StringBuffer src){ 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean startsWithPrefix = true; 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(src.length() < ACE_PREFIX_LENGTH){ 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int i=0; i<ACE_PREFIX_LENGTH;i++){ 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){ 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert startsWithPrefix = false; 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return startsWithPrefix; 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static char toASCIILower(char ch){ 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (char)(ch + LOWER_CASE_DELTA); 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ch; 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static StringBuffer toASCIILower(StringBuffer src){ 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer dest = new StringBuffer(); 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int i=0; i<src.length();i++){ 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dest.append(toASCIILower(src.charAt(i))); 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return dest; 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){ 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c1,c2; 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rc; 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int i =0;/* no condition */;i++) { 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* If we reach the ends of both strings then they match */ 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i == s1.length()) { 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c1 = s1.charAt(i); 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c2 = s2.charAt(i); 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Case-insensitive comparison */ 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c1!=c2) { 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rc=toASCIILower(c1)-toASCIILower(c2); 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(rc!=0) { 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return rc; 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int getSeparatorIndex(char[] src,int start, int limit){ 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(; start<limit;start++){ 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(NamePrepTransform.isLabelSeparator(src[start])){ 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return start; 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // we have not found the separator just return length 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return start; 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static boolean isLDHChar(int ch){ 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // high runner case 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(ch>0x007A){ 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if( (ch==0x002D) || 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (0x0030 <= ch && ch <= 0x0039) || 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (0x0041 <= ch && ch <= 0x005A) || 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (0x0061 <= ch && ch <= 0x007A) 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ){ 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return true; 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertToASCII(String src, int options) 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterIterator iter = UCharacterIterator.getInstance(src); 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertToASCII(iter,options); 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertToASCII(StringBuffer src, int options) 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterIterator iter = UCharacterIterator.getInstance(src); 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertToASCII(iter,options); 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options) 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char[] caseFlags = null; 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the source contains all ascii codepoints 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean srcIsASCII = true; 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // assume the source contains all LDH codepoints 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean srcIsLDH = true; 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //get the options 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int ch; 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 1 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while((ch = srcIter.next())!= UCharacterIterator.DONE){ 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(ch> 0x7f){ 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert srcIsASCII = false; 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int failPos = -1; 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert srcIter.setToStart(); 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer processOut = null; 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 2 is performed only if the source contains non ASCII 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!srcIsASCII){ 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 2 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert processOut = transform.prepare(srcIter,options); 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }else{ 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert processOut = new StringBuffer(srcIter.getText()); 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int poLen = processOut.length(); 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(poLen==0){ 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer dest = new StringBuffer(); 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // reset the variable to verify if output of prepare is ASCII or not 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert srcIsASCII = true; 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 3 & 4 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int j=0;j<poLen;j++ ){ 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ch=processOut.charAt(j); 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(ch > 0x7F){ 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert srcIsASCII = false; 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }else if(isLDHChar(ch)==false){ 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // here we do not assemble surrogates 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // since we know that LDH code points 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // are in the ASCII range only 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert srcIsLDH = false; 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert failPos = j; 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(useSTD3ASCIIRules == true){ 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // verify 3a and 3b 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if( srcIsLDH == false /* source contains some non-LDH characters */ 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || processOut.charAt(0) == HYPHEN 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || processOut.charAt(processOut.length()-1) == HYPHEN){ 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* populate the parseError struct */ 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(srcIsLDH==false){ 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules", 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringPrepParseException.STD3_ASCII_RULES_ERROR, 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert processOut.toString(), 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (failPos>0) ? (failPos-1) : failPos); 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }else if(processOut.charAt(0) == HYPHEN){ 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0); 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }else{ 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringPrepParseException.STD3_ASCII_RULES_ERROR, 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert processOut.toString(), 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (poLen>0) ? poLen-1 : poLen); 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(srcIsASCII){ 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dest = processOut; 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }else{ 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 5 : verify the sequence does not begin with ACE prefix 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!startsWithPrefix(processOut)){ 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //step 6: encode the sequence with punycode 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer punyout = PunycodeReference.encode(processOut,caseFlags); 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // convert all codepoints to lower case ASCII 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer lowerOut = toASCIILower(punyout); 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //Step 7: prepend the ACE prefix 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dest.append(ACE_PREFIX,0,ACE_PREFIX_LENGTH); 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //Step 6: copy the contents in b2 into dest 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dest.append(lowerOut); 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }else{ 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new StringPrepParseException("The input does not start with the ACE Prefix.", 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0); 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(dest.length() > MAX_LABEL_LENGTH){ 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new StringPrepParseException("The labels in the input are too long. Length > 64.", 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0); 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return dest; 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertIDNtoASCII(UCharacterIterator iter,int options) 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertIDNToASCII(iter.getText(), options); 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertIDNtoASCII(StringBuffer str,int options) 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertIDNToASCII(str.toString(), options); 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertIDNToASCII(String src,int options) 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char[] srcArr = src.toCharArray(); 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer result = new StringBuffer(); 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int sepIndex=0; 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int oldSepIndex = 0; 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;){ 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //make sure this is not a root label separator. 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!(label.length()==0 && sepIndex==srcArr.length)){ 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterIterator iter = UCharacterIterator.getInstance(label); 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(convertToASCII(iter,options)); 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(sepIndex==srcArr.length){ 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // increment the sepIndex to skip past the separator 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sepIndex++; 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oldSepIndex = sepIndex; 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append((char)FULL_STOP); 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertToUnicode(String src, int options) 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterIterator iter = UCharacterIterator.getInstance(src); 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertToUnicode(iter,options); 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertToUnicode(StringBuffer src, int options) 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterIterator iter = UCharacterIterator.getInstance(src); 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertToUnicode(iter,options); 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertToUnicode(UCharacterIterator iter, int options) 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the source contains all ascii codepoints 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean srcIsASCII = true; 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int ch; 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int saveIndex = iter.getIndex(); 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 1: find out if all the codepoints in src are ASCII 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while ((ch = iter.next()) != UCharacterIterator.DONE) { 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (ch > 0x7F) { 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert srcIsASCII = false; 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The RFC states that 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // <quote> 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ToUnicode never fails. If any step fails, then the original input 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // is returned immediately in that step. 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // </quote> 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer processOut; 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (srcIsASCII == false) { 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 2: process the string 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setIndex(saveIndex); 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert processOut = transform.prepare(iter, options); 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (StringPrepParseException e) { 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // just point to source 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert processOut = new StringBuffer(iter.getText()); 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 3: verify ACE Prefix 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (startsWithPrefix(processOut)) { 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 4: Remove the ACE Prefix 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String temp = processOut.substring(ACE_PREFIX_LENGTH, processOut.length()); 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 5: Decode using punycode 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer decodeOut = null; 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert decodeOut = PunycodeReference.decode(new StringBuffer(temp), null); 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (StringPrepParseException e) { 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 6:Apply toASCII 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer toASCIIOut = convertToASCII(decodeOut, options); 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 7: verify 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (compareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // step 8: return output of step 5 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return decodeOut; 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while (false); 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new StringBuffer(iter.getText()); 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertIDNToUnicode(UCharacterIterator iter, int options) 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertIDNToUnicode(iter.getText(), options); 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertIDNToUnicode(StringBuffer str, int options) 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return convertIDNToUnicode(str.toString(), options); 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static StringBuffer convertIDNToUnicode(String src, int options) 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char[] srcArr = src.toCharArray(); 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer result = new StringBuffer(); 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int sepIndex=0; 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int oldSepIndex=0; 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;){ 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(label.length()==0 && sepIndex!=srcArr.length ){ 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterIterator iter = UCharacterIterator.getInstance(label); 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(convertToUnicode(iter,options)); 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(sepIndex==srcArr.length){ 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // increment the sepIndex to skip past the separator 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sepIndex++; 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oldSepIndex = sepIndex; 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append((char)FULL_STOP); 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: optimize 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int compare(StringBuffer s1, StringBuffer s2, int options) 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(s1==null || s2 == null){ 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("One of the source buffers is null"); 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer s1Out = convertIDNToASCII(s1.toString(), options); 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer s2Out = convertIDNToASCII(s2.toString(), options); 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return compareCaseInsensitiveASCII(s1Out,s2Out); 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: optimize 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int compare(String s1, String s2, int options) 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(s1==null || s2 == null){ 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("One of the source buffers is null"); 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer s1Out = convertIDNToASCII(s1, options); 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer s2Out = convertIDNToASCII(s2, options); 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return compareCaseInsensitiveASCII(s1Out,s2Out); 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: optimize 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int compare(UCharacterIterator i1, UCharacterIterator i2, int options) 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throws StringPrepParseException{ 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(i1==null || i2 == null){ 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("One of the source buffers is null"); 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer s1Out = convertIDNToASCII(i1.getText(), options); 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer s2Out = convertIDNToASCII(i2.getText(), options); 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return compareCaseInsensitiveASCII(s1Out,s2Out); 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 408