17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2010-2014, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.EnumSet;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacterCategory;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacterDirection;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.IDNA;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer2;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.StringPrepParseException;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUException;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// Note about tests for IDNA.Error.DOMAIN_NAME_TOO_LONG:
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// The domain name length limit is 255 octets in an internal DNS representation
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// where the last ("root") label is the empty label
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// represented by length byte 0 alone.
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// In a conventional string, this translates to 253 characters, or 254
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// if there is a trailing dot for the root label.
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UTS #46 (IDNA2008) implementation.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Markus Scherer
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @since 2010jul09
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class UTS46 extends IDNA {
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public UTS46(int options) {
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.options=options;
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info) {
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return process(label, true, true, dest, info);
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info) {
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return process(label, true, false, dest, info);
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info) {
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        process(name, false, true, dest, info);
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if( dest.length()>=254 && !info.getErrors().contains(Error.DOMAIN_NAME_TOO_LONG) &&
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isASCIIString(dest) &&
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (dest.length()>254 || dest.charAt(253)!='.')
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ) {
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addError(info, Error.DOMAIN_NAME_TOO_LONG);
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info) {
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return process(name, false, false, dest, info);
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final Normalizer2 uts46Norm2=
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer2.getInstance(null, "uts46", Normalizer2.Mode.COMPOSE);  // uts46.nrm
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    final int options;
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Severe errors which usually result in a U+FFFD replacement character in the result string.
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final EnumSet<Error> severeErrors=EnumSet.of(
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Error.LEADING_COMBINING_MARK,
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Error.DISALLOWED,
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Error.PUNYCODE,
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Error.LABEL_HAS_DOT,
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Error.INVALID_ACE_LABEL);
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    isASCIIString(CharSequence dest) {
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int length=dest.length();
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i=0; i<length; ++i) {
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(dest.charAt(i)>0x7f) {
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // UTS #46 data for ASCII characters.
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // and passes through all other ASCII characters.
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // If USE_STD3_RULES is set, then non-LDH characters are disallowed
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // using this data.
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The ASCII fastpath also uses this data.
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Values: -1=disallowed  0==valid  1==mapped (lowercase)
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final byte asciiData[]={
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 002D..002E; valid  #  HYPHEN-MINUS..FULL STOP
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0, -1,
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 0030..0039; valid  #  DIGIT ZERO..DIGIT NINE
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1,
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 0041..005A; mapped  #  LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1, -1,
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 0061..007A; valid  #  LATIN SMALL LETTER A..LATIN SMALL LETTER Z
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    process(CharSequence src,
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean isLabel, boolean toASCII,
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder dest,
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Info info) {
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // uts46Norm2.normalize() would do all of this error checking and setup,
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // but with the ASCII fastpath we do not always call it, and do not
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // call it first.
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(dest==src) {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Arguments are fine, reset output values.
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dest.delete(0, 0x7fffffff);
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        resetInfo(info);
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int srcLength=src.length();
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(srcLength==0) {
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addError(info, Error.EMPTY_LABEL);
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return dest;
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ASCII fastpath
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean disallowNonLDHDot=(options&USE_STD3_RULES)!=0;
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int labelStart=0;
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i;
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(i=0;; ++i) {
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i==srcLength) {
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(toASCII) {
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if((i-labelStart)>63) {
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.LABEL_TOO_LONG);
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // There is a trailing dot if labelStart==i.
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addError(info, Error.DOMAIN_NAME_TOO_LONG);
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                promoteAndResetLabelErrors(info);
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return dest;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c=src.charAt(i);
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c>0x7f) {
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cData=asciiData[c];
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(cData>0) {
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.append((char)(c+0x20));  // Lowercase an uppercase ASCII letter.
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(cData<0 && disallowNonLDHDot) {
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;  // Replacing with U+FFFD can be complicated for toASCII.
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.append(c);
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c=='-') {  // hyphen
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(i==(labelStart+3) && src.charAt(i-1)=='-') {
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // "??--..." is Punycode or forbidden.
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++i;  // '-' was copied to dest already
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(i==labelStart) {
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // label starts with "-"
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.LEADING_HYPHEN);
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if((i+1)==srcLength || src.charAt(i+1)=='.') {
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // label ends with "-"
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.TRAILING_HYPHEN);
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(c=='.') {  // dot
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(isLabel) {
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Replacing with U+FFFD can be complicated for toASCII.
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++i;  // '.' was copied to dest already
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(i==labelStart) {
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.EMPTY_LABEL);
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(toASCII && (i-labelStart)>63) {
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.LABEL_TOO_LONG);
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    promoteAndResetLabelErrors(info);
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    labelStart=i+1;
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        promoteAndResetLabelErrors(info);
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        processUnicode(src, labelStart, i, isLabel, toASCII, dest, info);
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if( isBiDi(info) && !hasCertainErrors(info, severeErrors) &&
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (!isOkBiDi(info) || (labelStart>0 && !isASCIIOkBiDi(dest, labelStart)))
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ) {
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addError(info, Error.BIDI);
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    processUnicode(CharSequence src,
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   int labelStart, int mappingStart,
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   boolean isLabel, boolean toASCII,
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   StringBuilder dest,
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   Info info) {
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(mappingStart==0) {
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            uts46Norm2.normalize(src, dest);
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            uts46Norm2.normalizeSecondAndAppend(dest, src.subSequence(mappingStart, src.length()));
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean doMapDevChars=
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            toASCII ? (options&NONTRANSITIONAL_TO_ASCII)==0 :
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                      (options&NONTRANSITIONAL_TO_UNICODE)==0;
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int destLength=dest.length();
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int labelLimit=labelStart;
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(labelLimit<destLength) {
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c=dest.charAt(labelLimit);
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c=='.' && !isLabel) {
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int labelLength=labelLimit-labelStart;
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int newLength=processLabel(dest, labelStart, labelLength,
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                toASCII, info);
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                promoteAndResetLabelErrors(info);
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                destLength+=newLength-labelLength;
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                labelLimit=labelStart+=newLength+1;
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) {
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setTransitionalDifferent(info);
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(doMapDevChars) {
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    destLength=mapDevChars(dest, labelStart, labelLimit);
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Do not increment labelLimit in case c was removed.
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // All deviation characters have been mapped, no need to check for them again.
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    doMapDevChars=false;
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++labelLimit;
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++labelLimit;
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok)
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // but not an empty label elsewhere nor a completely empty domain name.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(0==labelStart || labelStart<labelLimit) {
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            processLabel(dest, labelStart, labelLimit-labelStart, toASCII, info);
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            promoteAndResetLabelErrors(info);
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // returns the new dest.length()
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    mapDevChars(StringBuilder dest, int labelStart, int mappingStart) {
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int length=dest.length();
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean didMapDevChars=false;
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i=mappingStart; i<length;) {
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c=dest.charAt(i);
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            switch(c) {
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0xdf:
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Map sharp s to ss.
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                didMapDevChars=true;
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.setCharAt(i++, 's');
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.insert(i++, 's');
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++length;
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x3c2:  // Map final sigma to nonfinal sigma.
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                didMapDevChars=true;
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.setCharAt(i++, '\u03c3');
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x200c:  // Ignore/remove ZWNJ.
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case 0x200d:  // Ignore/remove ZWJ.
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                didMapDevChars=true;
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dest.delete(i, i+1);
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --length;
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            default:
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++i;
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(didMapDevChars) {
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Mapping deviation characters might have resulted in an un-NFC string.
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We could use either the NFC or the UTS #46 normalizer.
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file.
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String normalized=uts46Norm2.normalize(dest.subSequence(labelStart, dest.length()));
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dest.replace(labelStart, 0x7fffffff, normalized);
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return dest.length();
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return length;
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Some non-ASCII characters are equivalent to sequences with
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // non-LDH ASCII characters. To find them:
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    isNonASCIIDisallowedSTD3Valid(int c) {
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c==0x2260 || c==0x226E || c==0x226F;
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Replace the label in dest with the label string, if the label was modified.
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // If label==dest then the label was modified in-place and labelLength
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // is the new label length, different from label.length().
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // If label!=dest then labelLength==label.length().
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Returns labelLength (= the new label length).
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    replaceLabel(StringBuilder dest, int destLabelStart, int destLabelLength,
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 CharSequence label, int labelLength) {
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(label!=dest) {
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dest.delete(destLabelStart, destLabelStart+destLabelLength).insert(destLabelStart, label);
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // or dest.replace(destLabelStart, destLabelStart+destLabelLength, label.toString());
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // which would create a String rather than moving characters in the StringBuilder.
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return labelLength;
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // returns the new label length
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    processLabel(StringBuilder dest,
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 int labelStart, int labelLength,
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 boolean toASCII,
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 Info info) {
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder fromPunycode;
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder labelString;
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int destLabelStart=labelStart;
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int destLabelLength=labelLength;
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean wasPunycode;
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if( labelLength>=4 &&
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dest.charAt(labelStart)=='x' && dest.charAt(labelStart+1)=='n' &&
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dest.charAt(labelStart+2)=='-' && dest.charAt(labelStart+3)=='-'
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ) {
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Label starts with "xn--", try to un-Punycode it.
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            wasPunycode=true;
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fromPunycode=Punycode.decode(dest.subSequence(labelStart+4, labelStart+labelLength), null);
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } catch (StringPrepParseException e) {
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addLabelError(info, Error.PUNYCODE);
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return markBadACELabel(dest, labelStart, labelLength, toASCII, info);
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for NFC, and for characters that are not
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // valid or deviation characters according to the normalizer.
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // If there is something wrong, then the string will change.
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Note that the normalizer passes through non-LDH ASCII and deviation characters.
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Deviation characters are ok in Punycode even in transitional processing.
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean isValid=uts46Norm2.isNormalized(fromPunycode);
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!isValid) {
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addLabelError(info, Error.INVALID_ACE_LABEL);
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return markBadACELabel(dest, labelStart, labelLength, toASCII, info);
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            labelString=fromPunycode;
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            labelStart=0;
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            labelLength=fromPunycode.length();
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            wasPunycode=false;
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            labelString=dest;
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Validity check
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(labelLength==0) {
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addLabelError(info, Error.EMPTY_LABEL);
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return replaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength);
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // labelLength>0
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(labelLength>=4 && labelString.charAt(labelStart+2)=='-' && labelString.charAt(labelStart+3)=='-') {
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // label starts with "??--"
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addLabelError(info, Error.HYPHEN_3_4);
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(labelString.charAt(labelStart)=='-') {
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // label starts with "-"
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addLabelError(info, Error.LEADING_HYPHEN);
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(labelString.charAt(labelStart+labelLength-1)=='-') {
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // label ends with "-"
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addLabelError(info, Error.TRAILING_HYPHEN);
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If the label was not a Punycode label, then it was the result of
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // mapping, normalization and label segmentation.
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If the label was in Punycode, then we mapped it again above
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and checked its validity.
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Now we handle the STD3 restriction to LDH characters (if set)
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and we look for U+FFFD which indicates disallowed characters
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // in a non-Punycode label or U+FFFD itself in a Punycode label.
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We also check for dots which can come from the input to a single-label function.
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Ok to cast away const because we own the UnicodeString.
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i=labelStart;
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limit=labelStart+labelLength;
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char oredChars=0;
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed.
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean disallowNonLDHDot=(options&USE_STD3_RULES)!=0;
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c=labelString.charAt(i);
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<=0x7f) {
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c=='.') {
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    addLabelError(info, Error.LABEL_HAS_DOT);
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    labelString.setCharAt(i, '\ufffd');
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(disallowNonLDHDot && asciiData[c]<0) {
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    addLabelError(info, Error.DISALLOWED);
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    labelString.setCharAt(i, '\ufffd');
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                oredChars|=c;
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) {
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    addLabelError(info, Error.DISALLOWED);
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    labelString.setCharAt(i, '\ufffd');
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(c==0xfffd) {
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    addLabelError(info, Error.DISALLOWED);
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++i;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } while(i<limit);
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Check for a leading combining mark after other validity checks
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // so that we don't report IDNA.Error.DISALLOWED for the U+FFFD from here.
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD.
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c=labelString.codePointAt(labelStart);
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) {
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addLabelError(info, Error.LEADING_COMBINING_MARK);
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            labelString.setCharAt(labelStart, '\ufffd');
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c>0xffff) {
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Remove c's trail surrogate.
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                labelString.deleteCharAt(labelStart+1);
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                --labelLength;
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(labelString==dest) {
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    --destLabelLength;
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!hasCertainLabelErrors(info, severeErrors)) {
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Do contextual checks only if we do not have U+FFFD from a severe error
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // because U+FFFD can make these checks fail.
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((options&CHECK_BIDI)!=0 && (!isBiDi(info) || isOkBiDi(info))) {
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkLabelBiDi(labelString, labelStart, labelLength, info);
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if( (options&CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c &&
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                !isLabelOkContextJ(labelString, labelStart, labelLength)
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ) {
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addLabelError(info, Error.CONTEXTJ);
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((options&CHECK_CONTEXTO)!=0 && oredChars>=0xb7) {
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                checkLabelContextO(labelString, labelStart, labelLength, info);
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(toASCII) {
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(wasPunycode) {
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Leave a Punycode label unchanged if it has no severe errors.
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(destLabelLength>63) {
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.LABEL_TOO_LONG);
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return destLabelLength;
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(oredChars>=0x80) {
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Contains non-ASCII characters.
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    StringBuilder punycode;
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    try {
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        punycode=Punycode.encode(labelString.subSequence(labelStart, labelStart+labelLength), null);
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } catch (StringPrepParseException e) {
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        throw new ICUException(e);  // unexpected
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    punycode.insert(0, "xn--");
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(punycode.length()>63) {
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.LABEL_TOO_LONG);
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return replaceLabel(dest, destLabelStart, destLabelLength,
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        punycode, punycode.length());
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // all-ASCII label
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(labelLength>63) {
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.LABEL_TOO_LONG);
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // If a Punycode label has severe errors,
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // then leave it but make sure it does not look valid.
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(wasPunycode) {
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addLabelError(info, Error.INVALID_ACE_LABEL);
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info);
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return replaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength);
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    markBadACELabel(StringBuilder dest,
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int labelStart, int labelLength,
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    boolean toASCII, Info info) {
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean disallowNonLDHDot=(options&USE_STD3_RULES)!=0;
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isASCII=true;
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean onlyLDH=true;
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i=labelStart+4;  // After the initial "xn--".
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limit=labelStart+labelLength;
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c=dest.charAt(i);
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<=0x7f) {
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c=='.') {
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    addLabelError(info, Error.LABEL_HAS_DOT);
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    dest.setCharAt(i, '\ufffd');
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    isASCII=onlyLDH=false;
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(asciiData[c]<0) {
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    onlyLDH=false;
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(disallowNonLDHDot) {
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        dest.setCharAt(i, '\ufffd');
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        isASCII=false;
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                isASCII=onlyLDH=false;
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } while(++i<limit);
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(onlyLDH) {
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dest.insert(labelStart+labelLength, '\ufffd');
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++labelLength;
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(toASCII && isASCII && labelLength>63) {
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addLabelError(info, Error.LABEL_TOO_LONG);
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return labelLength;
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int L_MASK=U_MASK(UCharacterDirection.LEFT_TO_RIGHT);
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int R_AL_MASK=
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.RIGHT_TO_LEFT)|
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.RIGHT_TO_LEFT_ARABIC);
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int L_R_AL_MASK=L_MASK|R_AL_MASK;
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int R_AL_AN_MASK=R_AL_MASK|U_MASK(UCharacterDirection.ARABIC_NUMBER);
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int EN_AN_MASK=
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.EUROPEAN_NUMBER)|
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.ARABIC_NUMBER);
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK;
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int L_EN_MASK=L_MASK|U_MASK(UCharacterDirection.EUROPEAN_NUMBER);
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int ES_CS_ET_ON_BN_NSM_MASK=
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.EUROPEAN_NUMBER_SEPARATOR)|
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.COMMON_NUMBER_SEPARATOR)|
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR)|
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.OTHER_NEUTRAL)|
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.BOUNDARY_NEUTRAL)|
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterDirection.DIR_NON_SPACING_MARK);
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // We scan the whole label and check both for whether it contains RTL characters
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // and whether it passes the BiDi Rule.
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // In a BiDi domain name, all labels must pass the BiDi Rule, but we might find
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // that a domain name is a BiDi domain name (has an RTL label) only after
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // processing several earlier labels.
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    checkLabelBiDi(CharSequence label, int labelStart, int labelLength, Info info) {
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // IDNA2008 BiDi rule
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the directionality of the first character.
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i=labelStart;
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c=Character.codePointAt(label, i);
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        i+=Character.charCount(c);
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstMask=U_MASK(UBiDiProps.INSTANCE.getClass(c));
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 1. The first character must be a character with BIDI property L, R
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // or AL.  If it has the R or AL property, it is an RTL label; if it
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // has the L property, it is an LTR label.
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((firstMask&~L_R_AL_MASK)!=0) {
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setNotOkBiDi(info);
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the directionality of the last non-NSM character.
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lastMask;
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int labelLimit=labelStart+labelLength;
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i>=labelLimit) {
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastMask=firstMask;
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=Character.codePointBefore(label, labelLimit);
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            labelLimit-=Character.charCount(c);
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int dir=UBiDiProps.INSTANCE.getClass(c);
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(dir!=UCharacterDirection.DIR_NON_SPACING_MARK) {
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastMask=U_MASK(dir);
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 3. In an RTL label, the end of the label must be a character with
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // BIDI property R, AL, EN or AN, followed by zero or more
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // characters with BIDI property NSM.
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 6. In an LTR label, the end of the label must be a character with
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // BIDI property L or EN, followed by zero or more characters with
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // BIDI property NSM.
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if( (firstMask&L_MASK)!=0 ?
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (lastMask&~L_EN_MASK)!=0 :
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (lastMask&~R_AL_EN_AN_MASK)!=0
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ) {
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setNotOkBiDi(info);
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the directionalities of the intervening characters.
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int mask=0;
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(i<labelLimit) {
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=Character.codePointAt(label, i);
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i+=Character.charCount(c);
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            mask|=U_MASK(UBiDiProps.INSTANCE.getClass(c));
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((firstMask&L_MASK)!=0) {
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // 5. In an LTR label, only characters with the BIDI properties L, EN,
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // ES, CS, ET, ON, BN and NSM are allowed.
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setNotOkBiDi(info);
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // 2. In an RTL label, only characters with the BIDI properties R, AL,
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // AN, EN, ES, CS, ET, ON, BN and NSM are allowed.
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setNotOkBiDi(info);
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // 4. In an RTL label, if an EN is present, no AN may be present, and
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // vice versa.
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((mask&EN_AN_MASK)==EN_AN_MASK) {
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setNotOkBiDi(info);
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // An RTL label is a label that contains at least one character of type
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // R, AL or AN. [...]
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // A "BIDI domain name" is a domain name that contains at least one RTL
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // label. [...]
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The following rule, consisting of six conditions, applies to labels
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // in BIDI domain names.
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) {
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setBiDi(info);
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Special code for the ASCII prefix of a BiDi domain name.
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The ASCII prefix is all-LTR.
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // IDNA2008 BiDi rule, parts relevant to ASCII labels:
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // 1. The first character must be a character with BIDI property L [...]
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // 5. In an LTR label, only characters with the BIDI properties L, EN,
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ES, CS, ET, ON, BN and NSM are allowed.
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // 6. In an LTR label, the end of the label must be a character with
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // BIDI property L or EN [...]
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // UTF-16 version, called for mapped ASCII prefix.
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Cannot contain uppercase A-Z.
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // s[length-1] must be the trailing dot.
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    isASCIIOkBiDi(CharSequence s, int length) {
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int labelStart=0;
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i=0; i<length; ++i) {
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c=s.charAt(i);
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c=='.') {  // dot
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(i>labelStart) {
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c=s.charAt(i-1);
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(!('a'<=c && c<='z') && !('0'<=c && c<='9')) {
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Last character in the label is not an L or EN.
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                labelStart=i+1;
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(i==labelStart) {
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!('a'<=c && c<='z')) {
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // First character in the label is not an L.
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Intermediate character in the label is a B, S or WS.
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    isLabelOkContextJ(CharSequence label, int labelStart, int labelLength) {
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // [IDNA2008-Tables]
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 200C..200D  ; CONTEXTJ    # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int labelLimit=labelStart+labelLength;
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i=labelStart; i<labelLimit; ++i) {
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(label.charAt(i)==0x200c) {
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Appendix A.1. ZERO WIDTH NON-JOINER
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Rule Set:
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  False;
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True;
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //     (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(i==labelStart) {
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c;
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int j=i;
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=Character.codePointBefore(label, j);
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                j-=Character.charCount(c);
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(uts46Norm2.getCombiningClass(c)==9) {
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // check precontext (Joining_Type:{L,D})(Joining_Type:T)*
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(;;) {
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* UJoiningType */ int type=UBiDiProps.INSTANCE.getJoiningType(c);
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(type==UCharacter.JoiningType.TRANSPARENT) {
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(j==0) {
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            return false;
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c=Character.codePointBefore(label, j);
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        j-=Character.charCount(c);
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(type==UCharacter.JoiningType.LEFT_JOINING || type==UCharacter.JoiningType.DUAL_JOINING) {
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;  // precontext fulfilled
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // check postcontext (Joining_Type:T)*(Joining_Type:{R,D})
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(j=i+1;;) {
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(j==labelLimit) {
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c=Character.codePointAt(label, j);
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    j+=Character.charCount(c);
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /* UJoiningType */ int type=UBiDiProps.INSTANCE.getJoiningType(c);
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(type==UCharacter.JoiningType.TRANSPARENT) {
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // just skip this character
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(type==UCharacter.JoiningType.RIGHT_JOINING || type==UCharacter.JoiningType.DUAL_JOINING) {
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;  // postcontext fulfilled
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(label.charAt(i)==0x200d) {
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Appendix A.2. ZERO WIDTH JOINER (U+200D)
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Rule Set:
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  False;
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True;
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(i==labelStart) {
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c=Character.codePointBefore(label, i);
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(uts46Norm2.getCombiningClass(c)!=9) {
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    checkLabelContextO(CharSequence label, int labelStart, int labelLength, Info info) {
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int labelEnd=labelStart+labelLength-1;  // inclusive
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int arabicDigits=0;  // -1 for 066x, +1 for 06Fx
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i=labelStart; i<=labelEnd; ++i) {
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=label.charAt(i);
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<0xb7) {
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // ASCII fastpath
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(c<=0x6f9) {
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c==0xb7) {
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Appendix A.3. MIDDLE DOT (U+00B7)
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Rule Set:
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  False;
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  If Before(cp) .eq.  U+006C And
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //     After(cp) .eq.  U+006C Then True;
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(!(labelStart<i && label.charAt(i-1)=='l' &&
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         i<labelEnd && label.charAt(i+1)=='l')) {
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(c==0x375) {
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375)
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Rule Set:
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  False;
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  If Script(After(cp)) .eq.  Greek Then True;
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(!(i<labelEnd &&
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         UScript.GREEK==UScript.getScript(Character.codePointAt(label, i+1)))) {
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(c==0x5f3 || c==0x5f4) {
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3)
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Rule Set:
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  False;
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  If Script(Before(cp)) .eq.  Hebrew Then True;
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4)
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Rule Set:
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  False;
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  If Script(Before(cp)) .eq.  Hebrew Then True;
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(!(labelStart<i &&
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         UScript.HEBREW==UScript.getScript(Character.codePointBefore(label, i)))) {
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(0x660<=c /* && c<=0x6f9 */) {
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669)
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Rule Set:
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  True;
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  For All Characters:
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //    If cp .in. 06F0..06F9 Then False;
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  End For;
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9)
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Rule Set:
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  True;
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  For All Characters:
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //    If cp .in. 0660..0669 Then False;
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    //  End For;
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(c<=0x669) {
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(arabicDigits>0) {
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            addLabelError(info, Error.CONTEXTO_DIGITS);
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        arabicDigits=-1;
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(0x6f0<=c) {
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(arabicDigits<0) {
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            addLabelError(info, Error.CONTEXTO_DIGITS);
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        arabicDigits=1;
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(c==0x30fb) {
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB)
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Rule Set:
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  False;
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  For All Characters:
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //    If Script(cp) .in. {Hiragana, Katakana, Han} Then True;
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //  End For;
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(int j=labelStart;; j+=Character.charCount(c)) {
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(j>labelEnd) {
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        addLabelError(info, Error.CONTEXTO_PUNCTUATION);
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c=Character.codePointAt(label, j);
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int script=UScript.getScript(c);
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(script==UScript.HIRAGANA || script==UScript.KATAKANA || script==UScript.HAN) {
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: make public(?) -- in C, these are public in uchar.h
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int U_MASK(int x) {
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 1<<x;
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int U_GET_GC_MASK(int c) {
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (1<<UCharacter.getType(c));
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int U_GC_M_MASK=
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterCategory.NON_SPACING_MARK)|
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterCategory.ENCLOSING_MARK)|
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        U_MASK(UCharacterCategory.COMBINING_SPACING_MARK);
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
838