/* ******************************************************************************* * Copyright (C) 2010-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.dev.test.normalizer; import java.util.Collections; import java.util.EnumSet; import java.util.Map; import java.util.Set; import java.util.TreeMap; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus; import com.ibm.icu.text.IDNA; /** * UTS #46 (IDNA2008) test. * @author Markus Scherer * @since 2010jul10 */ public class UTS46Test extends TestFmwk { public static void main(String[] args) throws Exception { new UTS46Test().run(args); } public UTS46Test() { int commonOptions= IDNA.USE_STD3_RULES|IDNA.CHECK_BIDI| IDNA.CHECK_CONTEXTJ|IDNA.CHECK_CONTEXTO; trans=IDNA.getUTS46Instance(commonOptions); nontrans=IDNA.getUTS46Instance(commonOptions| IDNA.NONTRANSITIONAL_TO_ASCII|IDNA.NONTRANSITIONAL_TO_UNICODE); } public void TestAPI() { StringBuilder result=new StringBuilder(); IDNA.Info info=new IDNA.Info(); String input="www.eXample.cOm"; String expected="www.example.com"; trans.nameToASCII(input, result, info); if(info.hasErrors() || !UTF16Plus.equal(result, expected)) { errln(String.format("T.nameToASCII(www.example.com) info.errors=%s result matches=%b", info.getErrors(), UTF16Plus.equal(result, expected))); } input="xn--bcher.de-65a"; expected="xn--bcher\uFFFDde-65a"; nontrans.labelToASCII(input, result, info); if( !info.getErrors().equals(EnumSet.of(IDNA.Error.LABEL_HAS_DOT, IDNA.Error.INVALID_ACE_LABEL)) || !UTF16Plus.equal(result, expected) ) { errln(String.format("N.labelToASCII(label-with-dot) failed with errors %s", info.getErrors())); } // Java API tests that are not parallel to C++ tests // because the C++ specifics (error codes etc.) do not apply here. String resultString=trans.nameToUnicode("fA\u00DF.de", result, info).toString(); if(info.hasErrors() || !resultString.equals("fass.de")) { errln(String.format("T.nameToUnicode(fA\u00DF.de) info.errors=%s result matches=%b", info.getErrors(), resultString.equals("fass.de"))); } try { nontrans.labelToUnicode(result, result, info); errln("N.labelToUnicode(result, result) did not throw an Exception"); } catch(Exception e) { // as expected (should be an IllegalArgumentException, or an ICU version of it) } } public void TestNotSTD3() { IDNA not3=IDNA.getUTS46Instance(IDNA.CHECK_BIDI); String input="\u0000A_2+2=4\n.e\u00DFen.net"; StringBuilder result=new StringBuilder(); IDNA.Info info=new IDNA.Info(); if( !not3.nameToUnicode(input, result, info).toString().equals("\u0000a_2+2=4\n.essen.net") || info.hasErrors() ) { errln(String.format("notSTD3.nameToUnicode(non-LDH ASCII) unexpected errors %s string %s", info.getErrors(), prettify(result.toString()))); } // A space (BiDi class WS) is not allowed in a BiDi domain name. input="a z.xn--4db.edu"; not3.nameToASCII(input, result, info); if(!UTF16Plus.equal(result, input) || !info.getErrors().equals(EnumSet.of(IDNA.Error.BIDI))) { errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed"); } // Characters that are canonically equivalent to sequences with non-LDH ASCII. input="a\u2260b\u226Ec\u226Fd"; not3.nameToUnicode(input, result, info); if(!UTF16Plus.equal(result, input) || info.hasErrors()) { errln(String.format("notSTD3.nameToUnicode(equiv to non-LDH ASCII) unexpected errors %s string %s", info.getErrors().toString(), prettify(result.toString()))); } } private static final Map errorNamesToErrors; static { errorNamesToErrors=new TreeMap(); errorNamesToErrors.put("UIDNA_ERROR_EMPTY_LABEL", IDNA.Error.EMPTY_LABEL); errorNamesToErrors.put("UIDNA_ERROR_LABEL_TOO_LONG", IDNA.Error.LABEL_TOO_LONG); errorNamesToErrors.put("UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", IDNA.Error.DOMAIN_NAME_TOO_LONG); errorNamesToErrors.put("UIDNA_ERROR_LEADING_HYPHEN", IDNA.Error.LEADING_HYPHEN); errorNamesToErrors.put("UIDNA_ERROR_TRAILING_HYPHEN", IDNA.Error.TRAILING_HYPHEN); errorNamesToErrors.put("UIDNA_ERROR_HYPHEN_3_4", IDNA.Error.HYPHEN_3_4); errorNamesToErrors.put("UIDNA_ERROR_LEADING_COMBINING_MARK", IDNA.Error.LEADING_COMBINING_MARK); errorNamesToErrors.put("UIDNA_ERROR_DISALLOWED", IDNA.Error.DISALLOWED); errorNamesToErrors.put("UIDNA_ERROR_PUNYCODE", IDNA.Error.PUNYCODE); errorNamesToErrors.put("UIDNA_ERROR_LABEL_HAS_DOT", IDNA.Error.LABEL_HAS_DOT); errorNamesToErrors.put("UIDNA_ERROR_INVALID_ACE_LABEL", IDNA.Error.INVALID_ACE_LABEL); errorNamesToErrors.put("UIDNA_ERROR_BIDI", IDNA.Error.BIDI); errorNamesToErrors.put("UIDNA_ERROR_CONTEXTJ", IDNA.Error.CONTEXTJ); errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_PUNCTUATION", IDNA.Error.CONTEXTO_PUNCTUATION); errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_DIGITS", IDNA.Error.CONTEXTO_DIGITS); } private static final class TestCase { private TestCase() { errors=EnumSet.noneOf(IDNA.Error.class); } private void set(String[] data) { s=data[0]; o=data[1]; u=data[2]; errors.clear(); if(data[3].length()!=0) { for(String e: data[3].split("\\|")) { errors.add(errorNamesToErrors.get(e)); } } } // Input string and options string (Nontransitional/Transitional/Both). private String s, o; // Expected Unicode result string. private String u; private EnumSet errors; }; private static final String testCases[][]={ { "www.eXample.cOm", "B", // all ASCII "www.example.com", "" }, { "B\u00FCcher.de", "B", // u-umlaut "b\u00FCcher.de", "" }, { "\u00D6BB", "B", // O-umlaut "\u00F6bb", "" }, { "fa\u00DF.de", "N", // sharp s "fa\u00DF.de", "" }, { "fa\u00DF.de", "T", // sharp s "fass.de", "" }, { "XN--fA-hia.dE", "B", // sharp s in Punycode "fa\u00DF.de", "" }, { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "N", // Greek with final sigma "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "" }, { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "T", // Greek with final sigma "\u03B2\u03CC\u03BB\u03BF\u03C3.com", "" }, { "xn--nxasmm1c", "B", // Greek with final sigma in Punycode "\u03B2\u03CC\u03BB\u03BF\u03C2", "" }, { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "N", // "Sri" in "Sri Lanka" has a ZWJ "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" }, { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "T", // "Sri" in "Sri Lanka" has a ZWJ "www.\u0DC1\u0DCA\u0DBB\u0DD3.com", "" }, { "www.xn--10cl1a0b660p.com", "B", // "Sri" in Punycode "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" }, { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "N", // ZWNJ "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "" }, { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "T", // ZWNJ "\u0646\u0627\u0645\u0647\u0627\u06CC", "" }, { "xn--mgba3gch31f060k.com", "B", // ZWNJ in Punycode "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC.com", "" }, { "a.b\uFF0Ec\u3002d\uFF61", "B", "a.b.c.d.", "" }, { "U\u0308.xn--tda", "B", // U+umlaut.u-umlaut "\u00FC.\u00FC", "" }, { "xn--u-ccb", "B", // u+umlaut in Punycode "xn--u-ccb\uFFFD", "UIDNA_ERROR_INVALID_ACE_LABEL" }, { "a\u2488com", "B", // contains 1-dot "a\uFFFDcom", "UIDNA_ERROR_DISALLOWED" }, { "xn--a-ecp.ru", "B", // contains 1-dot in Punycode "xn--a-ecp\uFFFD.ru", "UIDNA_ERROR_INVALID_ACE_LABEL" }, { "xn--0.pt", "B", // invalid Punycode "xn--0\uFFFD.pt", "UIDNA_ERROR_PUNYCODE" }, { "xn--a.pt", "B", // U+0080 "xn--a\uFFFD.pt", "UIDNA_ERROR_INVALID_ACE_LABEL" }, { "xn--a-\u00C4.pt", "B", // invalid Punycode "xn--a-\u00E4.pt", "UIDNA_ERROR_PUNYCODE" }, { "\u65E5\u672C\u8A9E\u3002\uFF2A\uFF30", "B", // Japanese with fullwidth ".jp" "\u65E5\u672C\u8A9E.jp", "" }, { "\u2615", "B", "\u2615", "" }, // Unicode 4.0 HOT BEVERAGE // some characters are disallowed because they are canonically equivalent // to sequences with non-LDH ASCII { "a\u2260b\u226Ec\u226Fd", "B", "a\uFFFDb\uFFFDc\uFFFDd", "UIDNA_ERROR_DISALLOWED" }, // many deviation characters, test the special mapping code { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+ "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "N", "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+ "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_CONTEXTJ" }, { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+ "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+ "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "T", "1.assbcssssssssd"+ "\u03C3\u03C3sssssssssssssssse"+ "ssssssssssssssssssssx"+ "ssssssssssssssssssssy"+ "sssssssssssssss\u015Dssz", "UIDNA_ERROR_LABEL_TOO_LONG" }, // "xn--bss" with deviation characters { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "N", "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "UIDNA_ERROR_CONTEXTJ" }, { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "T", "\u5919", "" }, // "xn--bssffl" written as: // 02E3 MODIFIER LETTER SMALL X // 034F COMBINING GRAPHEME JOINER (ignored) // 2115 DOUBLE-STRUCK CAPITAL N // 200B ZERO WIDTH SPACE (ignored) // FE63 SMALL HYPHEN-MINUS // 00AD SOFT HYPHEN (ignored) // FF0D FULLWIDTH HYPHEN-MINUS // 180C MONGOLIAN FREE VARIATION SELECTOR TWO (ignored) // 212C SCRIPT CAPITAL B // FE00 VARIATION SELECTOR-1 (ignored) // 017F LATIN SMALL LETTER LONG S // 2064 INVISIBLE PLUS (ignored) // 1D530 MATHEMATICAL FRAKTUR SMALL S // E01EF VARIATION SELECTOR-256 (ignored) // FB04 LATIN SMALL LIGATURE FFL { "\u02E3\u034F\u2115\u200B\uFE63\u00AD\uFF0D\u180C"+ "\u212C\uFE00\u017F\u2064"+"\uD835\uDD30\uDB40\uDDEF"/*1D530 E01EF*/+"\uFB04", "B", "\u5921\u591E\u591C\u5919", "" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901.", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901.", "" }, // Domain name >256 characters, forces slow path in UTF-8 processing. { "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "12345678901234567890123456789012345678901234567890123456789012", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "12345678901234567890123456789012345678901234567890123456789012", "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789\u05D0", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789\u05D0", "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG|UIDNA_ERROR_BIDI" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901234."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901234."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890", "UIDNA_ERROR_LABEL_TOO_LONG" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901234."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890.", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901234."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890.", "UIDNA_ERROR_LABEL_TOO_LONG" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901234."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901234."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, // label length 63: xn--1234567890123456789012345678901234567890123456789012345-9te { "\u00E41234567890123456789012345678901234567890123456789012345", "B", "\u00E41234567890123456789012345678901234567890123456789012345", "" }, { "1234567890\u00E41234567890123456789012345678901234567890123456", "B", "1234567890\u00E41234567890123456789012345678901234567890123456", "UIDNA_ERROR_LABEL_TOO_LONG" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E4123456789012345678901234567890123456789012345."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E4123456789012345678901234567890123456789012345."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E4123456789012345678901234567890123456789012345."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901.", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E4123456789012345678901234567890123456789012345."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901.", "" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E4123456789012345678901234567890123456789012345."+ "123456789012345678901234567890123456789012345678901234567890123."+ "12345678901234567890123456789012345678901234567890123456789012", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E4123456789012345678901234567890123456789012345."+ "123456789012345678901234567890123456789012345678901234567890123."+ "12345678901234567890123456789012345678901234567890123456789012", "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E41234567890123456789012345678901234567890123456."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E41234567890123456789012345678901234567890123456."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890", "UIDNA_ERROR_LABEL_TOO_LONG" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E41234567890123456789012345678901234567890123456."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890.", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E41234567890123456789012345678901234567890123456."+ "123456789012345678901234567890123456789012345678901234567890123."+ "123456789012345678901234567890123456789012345678901234567890.", "UIDNA_ERROR_LABEL_TOO_LONG" }, { "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E41234567890123456789012345678901234567890123456."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "B", "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890\u00E41234567890123456789012345678901234567890123456."+ "123456789012345678901234567890123456789012345678901234567890123."+ "1234567890123456789012345678901234567890123456789012345678901", "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, // hyphen errors and empty-label errors // Ticket #10883: ToUnicode also checks for empty labels. { ".", "B", ".", "UIDNA_ERROR_EMPTY_LABEL" }, { "\uFF0E", "B", ".", "UIDNA_ERROR_EMPTY_LABEL" }, // "xn---q----jra"=="-q--a-umlaut-" { "a.b..-q--a-.e", "B", "a.b..-q--a-.e", "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+ "UIDNA_ERROR_HYPHEN_3_4" }, { "a.b..-q--\u00E4-.e", "B", "a.b..-q--\u00E4-.e", "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+ "UIDNA_ERROR_HYPHEN_3_4" }, { "a.b..xn---q----jra.e", "B", "a.b..-q--\u00E4-.e", "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+ "UIDNA_ERROR_HYPHEN_3_4" }, { "a..c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" }, { "a.xn--.c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" }, { "a.-b.", "B", "a.-b.", "UIDNA_ERROR_LEADING_HYPHEN" }, { "a.b-.c", "B", "a.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" }, { "a.-.c", "B", "a.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" }, { "a.bc--de.f", "B", "a.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" }, { "\u00E4.\u00AD.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" }, { "\u00E4.xn--.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" }, { "\u00E4.-b.", "B", "\u00E4.-b.", "UIDNA_ERROR_LEADING_HYPHEN" }, { "\u00E4.b-.c", "B", "\u00E4.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" }, { "\u00E4.-.c", "B", "\u00E4.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" }, { "\u00E4.bc--de.f", "B", "\u00E4.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" }, { "a.b.\u0308c.d", "B", "a.b.\uFFFDc.d", "UIDNA_ERROR_LEADING_COMBINING_MARK" }, { "a.b.xn--c-bcb.d", "B", "a.b.xn--c-bcb\uFFFD.d", "UIDNA_ERROR_LEADING_COMBINING_MARK|UIDNA_ERROR_INVALID_ACE_LABEL" }, // BiDi { "A0", "B", "a0", "" }, { "0A", "B", "0a", "" }, // all-LTR is ok to start with a digit (EN) { "0A.\u05D0", "B", // ASCII label does not start with L/R/AL "0a.\u05D0", "UIDNA_ERROR_BIDI" }, { "c.xn--0-eha.xn--4db", "B", // 2nd label does not start with L/R/AL "c.0\u00FC.\u05D0", "UIDNA_ERROR_BIDI" }, { "b-.\u05D0", "B", // label does not end with L/EN "b-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" }, { "d.xn----dha.xn--4db", "B", // 2nd label does not end with L/EN "d.\u00FC-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" }, { "a\u05D0", "B", "a\u05D0", "UIDNA_ERROR_BIDI" }, // first dir != last dir { "\u05D0\u05C7", "B", "\u05D0\u05C7", "" }, { "\u05D09\u05C7", "B", "\u05D09\u05C7", "" }, { "\u05D0a\u05C7", "B", "\u05D0a\u05C7", "UIDNA_ERROR_BIDI" }, // first dir != last dir { "\u05D0\u05EA", "B", "\u05D0\u05EA", "" }, { "\u05D0\u05F3\u05EA", "B", "\u05D0\u05F3\u05EA", "" }, { "a\u05D0Tz", "B", "a\u05D0tz", "UIDNA_ERROR_BIDI" }, // mixed dir { "\u05D0T\u05EA", "B", "\u05D0t\u05EA", "UIDNA_ERROR_BIDI" }, // mixed dir { "\u05D07\u05EA", "B", "\u05D07\u05EA", "" }, { "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" }, // Arabic 7 in the middle { "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" }, // AN digit in LTR { "\u05D07\u0667\u05EA", "B", // mixed EN/AN digits in RTL "\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" }, // ZWJ { "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" }, // Virama+ZWJ { "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama { "\u200D", "N", "\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama // ZWNJ { "\u0BB9\u0BCD\u200C", "N", "\u0BB9\u0BCD\u200C", "" }, // Virama+ZWNJ { "\u0BB9\u200C", "N", "\u0BB9\u200C", "UIDNA_ERROR_CONTEXTJ" }, // no Virama { "\u200C", "N", "\u200C", "UIDNA_ERROR_CONTEXTJ" }, // no Virama { "\u0644\u0670\u200C\u06ED\u06EF", "N", // Joining types D T ZWNJ T R "\u0644\u0670\u200C\u06ED\u06EF", "" }, { "\u0644\u0670\u200C\u06EF", "N", // D T ZWNJ R "\u0644\u0670\u200C\u06EF", "" }, { "\u0644\u200C\u06ED\u06EF", "N", // D ZWNJ T R "\u0644\u200C\u06ED\u06EF", "" }, { "\u0644\u200C\u06EF", "N", // D ZWNJ R "\u0644\u200C\u06EF", "" }, { "\u0644\u0670\u200C\u06ED", "N", // D T ZWNJ T "\u0644\u0670\u200C\u06ED", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" }, { "\u06EF\u200C\u06EF", "N", // R ZWNJ R "\u06EF\u200C\u06EF", "UIDNA_ERROR_CONTEXTJ" }, { "\u0644\u200C", "N", // D ZWNJ "\u0644\u200C", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" }, { "\u0660\u0661", "B", // Arabic-Indic Digits alone "\u0660\u0661", "UIDNA_ERROR_BIDI" }, { "\u06F0\u06F1", "B", // Extended Arabic-Indic Digits alone "\u06F0\u06F1", "" }, { "\u0660\u06F1", "B", // Mixed Arabic-Indic Digits "\u0660\u06F1", "UIDNA_ERROR_CONTEXTO_DIGITS|UIDNA_ERROR_BIDI" }, // All of the CONTEXTO "Would otherwise have been DISALLOWED" characters // in their correct contexts, // then each in incorrect context. { "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "B", "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "UIDNA_ERROR_BIDI" }, { "l\u00B7", "B", "l\u00B7", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, { "\u00B7l", "B", "\u00B7l", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, { "\u0375", "B", "\u0375", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, { "\u03B1\u05F3", "B", "\u03B1\u05F3", "UIDNA_ERROR_CONTEXTO_PUNCTUATION|UIDNA_ERROR_BIDI" }, { "\u05F4", "B", "\u05F4", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, { "l\u30FB", "B", "l\u30FB", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, // { "", "B", // "", "" }, }; public void TestSomeCases() { StringBuilder aT=new StringBuilder(), uT=new StringBuilder(); StringBuilder aN=new StringBuilder(), uN=new StringBuilder(); IDNA.Info aTInfo=new IDNA.Info(), uTInfo=new IDNA.Info(); IDNA.Info aNInfo=new IDNA.Info(), uNInfo=new IDNA.Info(); StringBuilder aTuN=new StringBuilder(), uTaN=new StringBuilder(); StringBuilder aNuN=new StringBuilder(), uNaN=new StringBuilder(); IDNA.Info aTuNInfo=new IDNA.Info(), uTaNInfo=new IDNA.Info(); IDNA.Info aNuNInfo=new IDNA.Info(), uNaNInfo=new IDNA.Info(); StringBuilder aTL=new StringBuilder(), uTL=new StringBuilder(); StringBuilder aNL=new StringBuilder(), uNL=new StringBuilder(); IDNA.Info aTLInfo=new IDNA.Info(), uTLInfo=new IDNA.Info(); IDNA.Info aNLInfo=new IDNA.Info(), uNLInfo=new IDNA.Info(); EnumSet uniErrors=EnumSet.noneOf(IDNA.Error.class); TestCase testCase=new TestCase(); int i; for(i=0; i severeErrors=EnumSet.of( IDNA.Error.LEADING_COMBINING_MARK, IDNA.Error.DISALLOWED, IDNA.Error.PUNYCODE, IDNA.Error.LABEL_HAS_DOT, IDNA.Error.INVALID_ACE_LABEL); private static final EnumSet lengthOverflowErrors=EnumSet.of( IDNA.Error.LABEL_TOO_LONG, IDNA.Error.DOMAIN_NAME_TOO_LONG); private boolean hasError(IDNA.Info info, IDNA.Error error) { return info.getErrors().contains(error); } // assumes that certainErrors is not empty private boolean hasCertainErrors(Set errors, Set certainErrors) { return !errors.isEmpty() && !Collections.disjoint(errors, certainErrors); } private boolean hasCertainErrors(IDNA.Info info, Set certainErrors) { return hasCertainErrors(info.getErrors(), certainErrors); } private boolean sameErrors(Set a, Set b) { return a.equals(b); } private boolean sameErrors(IDNA.Info a, IDNA.Info b) { return sameErrors(a.getErrors(), b.getErrors()); } private boolean sameErrors(IDNA.Info a, Set b) { return sameErrors(a.getErrors(), b); } private static boolean isASCII(CharSequence str) { int length=str.length(); for(int i=0; i=0x80) { return false; } } return true; } }