1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3******************************************************************************* 4* Copyright (C) 2003-2010, International Business Machines 5* Corporation and others. All Rights Reserved. 6******************************************************************************* 7*/ 8package android.icu.impl; 9 10import android.icu.text.IDNA; 11import android.icu.text.StringPrep; 12import android.icu.text.StringPrepParseException; 13import android.icu.text.UCharacterIterator; 14 15/** 16 * IDNA2003 implementation code, moved out of android.icu.text.IDNA.java 17 * while extending that class to support IDNA2008/UTS #46 as well. 18 * @author Ram Viswanadha 19 * @hide Only a subset of ICU is exposed in Android 20 */ 21public final class IDNA2003 { 22 /* IDNA ACE Prefix is "xn--" */ 23 private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ; 24 //private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length; 25 26 private static final int MAX_LABEL_LENGTH = 63; 27 private static final int HYPHEN = 0x002D; 28 private static final int CAPITAL_A = 0x0041; 29 private static final int CAPITAL_Z = 0x005A; 30 private static final int LOWER_CASE_DELTA = 0x0020; 31 private static final int FULL_STOP = 0x002E; 32 private static final int MAX_DOMAIN_NAME_LENGTH = 255; 33 34 // The NamePrep profile object 35 private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP); 36 37 private static boolean startsWithPrefix(StringBuffer src){ 38 boolean startsWithPrefix = true; 39 40 if(src.length() < ACE_PREFIX.length){ 41 return false; 42 } 43 for(int i=0; i<ACE_PREFIX.length;i++){ 44 if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){ 45 startsWithPrefix = false; 46 } 47 } 48 return startsWithPrefix; 49 } 50 51 private static char toASCIILower(char ch){ 52 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 53 return (char)(ch + LOWER_CASE_DELTA); 54 } 55 return ch; 56 } 57 58 private static StringBuffer toASCIILower(CharSequence src){ 59 StringBuffer dest = new StringBuffer(); 60 for(int i=0; i<src.length();i++){ 61 dest.append(toASCIILower(src.charAt(i))); 62 } 63 return dest; 64 } 65 66 private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){ 67 char c1,c2; 68 int rc; 69 for(int i =0;/* no condition */;i++) { 70 /* If we reach the ends of both strings then they match */ 71 if(i == s1.length()) { 72 return 0; 73 } 74 75 c1 = s1.charAt(i); 76 c2 = s2.charAt(i); 77 78 /* Case-insensitive comparison */ 79 if(c1!=c2) { 80 rc=toASCIILower(c1)-toASCIILower(c2); 81 if(rc!=0) { 82 return rc; 83 } 84 } 85 } 86 } 87 88 private static int getSeparatorIndex(char[] src,int start, int limit){ 89 for(; start<limit;start++){ 90 if(isLabelSeparator(src[start])){ 91 return start; 92 } 93 } 94 // we have not found the separator just return length 95 return start; 96 } 97 98 /* 99 private static int getSeparatorIndex(UCharacterIterator iter){ 100 int currentIndex = iter.getIndex(); 101 int separatorIndex = 0; 102 int ch; 103 while((ch=iter.next())!= UCharacterIterator.DONE){ 104 if(isLabelSeparator(ch)){ 105 separatorIndex = iter.getIndex(); 106 iter.setIndex(currentIndex); 107 return separatorIndex; 108 } 109 } 110 // reset index 111 iter.setIndex(currentIndex); 112 // we have not found the separator just return the length 113 114 } 115 */ 116 117 118 private static boolean isLDHChar(int ch){ 119 // high runner case 120 if(ch>0x007A){ 121 return false; 122 } 123 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 124 if( (ch==0x002D) || 125 (0x0030 <= ch && ch <= 0x0039) || 126 (0x0041 <= ch && ch <= 0x005A) || 127 (0x0061 <= ch && ch <= 0x007A) 128 ){ 129 return true; 130 } 131 return false; 132 } 133 134 /** 135 * Ascertain if the given code point is a label separator as 136 * defined by the IDNA RFC 137 * 138 * @param ch The code point to be ascertained 139 * @return true if the char is a label separator 140 */ 141 private static boolean isLabelSeparator(int ch){ 142 switch(ch){ 143 case 0x002e: 144 case 0x3002: 145 case 0xFF0E: 146 case 0xFF61: 147 return true; 148 default: 149 return false; 150 } 151 } 152 153 public static StringBuffer convertToASCII(UCharacterIterator src, int options) 154 throws StringPrepParseException{ 155 156 boolean[] caseFlags = null; 157 158 // the source contains all ascii codepoints 159 boolean srcIsASCII = true; 160 // assume the source contains all LDH codepoints 161 boolean srcIsLDH = true; 162 163 //get the options 164 boolean useSTD3ASCIIRules = ((options & IDNA.USE_STD3_RULES) != 0); 165 int ch; 166 // step 1 167 while((ch = src.next())!= UCharacterIterator.DONE){ 168 if(ch> 0x7f){ 169 srcIsASCII = false; 170 } 171 } 172 int failPos = -1; 173 src.setToStart(); 174 StringBuffer processOut = null; 175 // step 2 is performed only if the source contains non ASCII 176 if(!srcIsASCII){ 177 // step 2 178 processOut = namePrep.prepare(src, options); 179 }else{ 180 processOut = new StringBuffer(src.getText()); 181 } 182 int poLen = processOut.length(); 183 184 if(poLen==0){ 185 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); 186 } 187 StringBuffer dest = new StringBuffer(); 188 189 // reset the variable to verify if output of prepare is ASCII or not 190 srcIsASCII = true; 191 192 // step 3 & 4 193 for(int j=0;j<poLen;j++ ){ 194 ch=processOut.charAt(j); 195 if(ch > 0x7F){ 196 srcIsASCII = false; 197 }else if(isLDHChar(ch)==false){ 198 // here we do not assemble surrogates 199 // since we know that LDH code points 200 // are in the ASCII range only 201 srcIsLDH = false; 202 failPos = j; 203 } 204 } 205 206 if(useSTD3ASCIIRules == true){ 207 // verify 3a and 3b 208 if( srcIsLDH == false /* source contains some non-LDH characters */ 209 || processOut.charAt(0) == HYPHEN 210 || processOut.charAt(processOut.length()-1) == HYPHEN){ 211 212 /* populate the parseError struct */ 213 if(srcIsLDH==false){ 214 throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules", 215 StringPrepParseException.STD3_ASCII_RULES_ERROR, 216 processOut.toString(), 217 (failPos>0) ? (failPos-1) : failPos); 218 }else if(processOut.charAt(0) == HYPHEN){ 219 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 220 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0); 221 222 }else{ 223 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 224 StringPrepParseException.STD3_ASCII_RULES_ERROR, 225 processOut.toString(), 226 (poLen>0) ? poLen-1 : poLen); 227 228 } 229 } 230 } 231 if(srcIsASCII){ 232 dest = processOut; 233 }else{ 234 // step 5 : verify the sequence does not begin with ACE prefix 235 if(!startsWithPrefix(processOut)){ 236 237 //step 6: encode the sequence with punycode 238 caseFlags = new boolean[poLen]; 239 240 StringBuilder punyout = Punycode.encode(processOut,caseFlags); 241 242 // convert all codepoints to lower case ASCII 243 StringBuffer lowerOut = toASCIILower(punyout); 244 245 //Step 7: prepend the ACE prefix 246 dest.append(ACE_PREFIX,0,ACE_PREFIX.length); 247 //Step 6: copy the contents in b2 into dest 248 dest.append(lowerOut); 249 }else{ 250 251 throw new StringPrepParseException("The input does not start with the ACE Prefix.", 252 StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0); 253 } 254 } 255 if(dest.length() > MAX_LABEL_LENGTH){ 256 throw new StringPrepParseException("The labels in the input are too long. Length > 63.", 257 StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0); 258 } 259 return dest; 260 } 261 262 public static StringBuffer convertIDNToASCII(String src,int options) 263 throws StringPrepParseException{ 264 265 char[] srcArr = src.toCharArray(); 266 StringBuffer result = new StringBuffer(); 267 int sepIndex=0; 268 int oldSepIndex=0; 269 for(;;){ 270 sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); 271 String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); 272 //make sure this is not a root label separator. 273 if(!(label.length()==0 && sepIndex==srcArr.length)){ 274 UCharacterIterator iter = UCharacterIterator.getInstance(label); 275 result.append(convertToASCII(iter,options)); 276 } 277 if(sepIndex==srcArr.length){ 278 break; 279 } 280 281 // increment the sepIndex to skip past the separator 282 sepIndex++; 283 oldSepIndex = sepIndex; 284 result.append((char)FULL_STOP); 285 } 286 if(result.length() > MAX_DOMAIN_NAME_LENGTH){ 287 throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR); 288 } 289 return result; 290 } 291 292 public static StringBuffer convertToUnicode(UCharacterIterator src, int options) 293 throws StringPrepParseException{ 294 295 boolean[] caseFlags = null; 296 297 // the source contains all ascii codepoints 298 boolean srcIsASCII = true; 299 // assume the source contains all LDH codepoints 300 //boolean srcIsLDH = true; 301 302 //get the options 303 //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); 304 305 //int failPos = -1; 306 int ch; 307 int saveIndex = src.getIndex(); 308 // step 1: find out if all the codepoints in src are ASCII 309 while((ch=src.next())!= UCharacterIterator.DONE){ 310 if(ch>0x7F){ 311 srcIsASCII = false; 312 }/*else if((srcIsLDH = isLDHChar(ch))==false){ 313 failPos = src.getIndex(); 314 }*/ 315 } 316 StringBuffer processOut; 317 318 if(srcIsASCII == false){ 319 try { 320 // step 2: process the string 321 src.setIndex(saveIndex); 322 processOut = namePrep.prepare(src,options); 323 } catch (StringPrepParseException ex) { 324 return new StringBuffer(src.getText()); 325 } 326 327 }else{ 328 //just point to source 329 processOut = new StringBuffer(src.getText()); 330 } 331 // TODO: 332 // The RFC states that 333 // <quote> 334 // ToUnicode never fails. If any step fails, then the original input 335 // is returned immediately in that step. 336 // </quote> 337 338 //step 3: verify ACE Prefix 339 if(startsWithPrefix(processOut)){ 340 StringBuffer decodeOut = null; 341 342 //step 4: Remove the ACE Prefix 343 String temp = processOut.substring(ACE_PREFIX.length,processOut.length()); 344 345 //step 5: Decode using punycode 346 try { 347 decodeOut = new StringBuffer(Punycode.decode(temp,caseFlags)); 348 } catch (StringPrepParseException e) { 349 decodeOut = null; 350 } 351 352 //step 6:Apply toASCII 353 if (decodeOut != null) { 354 StringBuffer toASCIIOut = convertToASCII(UCharacterIterator.getInstance(decodeOut), options); 355 356 //step 7: verify 357 if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){ 358// throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed", 359// StringPrepParseException.VERIFICATION_ERROR); 360 decodeOut = null; 361 } 362 } 363 364 //step 8: return output of step 5 365 if (decodeOut != null) { 366 return decodeOut; 367 } 368 } 369 370// }else{ 371// // verify that STD3 ASCII rules are satisfied 372// if(useSTD3ASCIIRules == true){ 373// if( srcIsLDH == false /* source contains some non-LDH characters */ 374// || processOut.charAt(0) == HYPHEN 375// || processOut.charAt(processOut.length()-1) == HYPHEN){ 376// 377// if(srcIsLDH==false){ 378// throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 379// StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), 380// (failPos>0) ? (failPos-1) : failPos); 381// }else if(processOut.charAt(0) == HYPHEN){ 382// throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 383// StringPrepParseException.STD3_ASCII_RULES_ERROR, 384// processOut.toString(),0); 385// 386// }else{ 387// throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 388// StringPrepParseException.STD3_ASCII_RULES_ERROR, 389// processOut.toString(), 390// processOut.length()); 391// 392// } 393// } 394// } 395// // just return the source 396// return new StringBuffer(src.getText()); 397// } 398 399 return new StringBuffer(src.getText()); 400 } 401 402 public static StringBuffer convertIDNToUnicode(String src, int options) 403 throws StringPrepParseException{ 404 405 char[] srcArr = src.toCharArray(); 406 StringBuffer result = new StringBuffer(); 407 int sepIndex=0; 408 int oldSepIndex=0; 409 for(;;){ 410 sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); 411 String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); 412 if(label.length()==0 && sepIndex!=srcArr.length ){ 413 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); 414 } 415 UCharacterIterator iter = UCharacterIterator.getInstance(label); 416 result.append(convertToUnicode(iter,options)); 417 if(sepIndex==srcArr.length){ 418 break; 419 } 420 // Unlike the ToASCII operation we don't normalize the label separators 421 result.append(srcArr[sepIndex]); 422 // increment the sepIndex to skip past the separator 423 sepIndex++; 424 oldSepIndex =sepIndex; 425 } 426 if(result.length() > MAX_DOMAIN_NAME_LENGTH){ 427 throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR); 428 } 429 return result; 430 } 431 432 public static int compare(String s1, String s2, int options) throws StringPrepParseException{ 433 StringBuffer s1Out = convertIDNToASCII(s1, options); 434 StringBuffer s2Out = convertIDNToASCII(s2, options); 435 return compareCaseInsensitiveASCII(s1Out,s2Out); 436 } 437} 438