12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 1996-2015, International Business Machines Corporation and    *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.                                                *
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList;
14495cb271e305cfb399d463f32210a371198f0abfFredrik Roubertimport java.util.Arrays;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Locale;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.regex.Pattern;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Replaceable;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16;
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeMatcher;
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
231537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/**
241537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android
25836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class Utility {
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char APOSTROPHE = '\'';
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char BACKSLASH  = '\\';
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int MAGIC_UNSIGNED = 0x80000000;
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s.
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(Object[] source, Object target) {
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof Object[])) return false;
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Object[] targ = (Object[]) target;
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two int[]s
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(int[] source, Object target) {
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof int[])) return false;
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] targ = (int[]) target;
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two double[]s
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(double[] source, Object target) {
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof double[])) return false;
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        double[] targ = (double[]) target;
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(byte[] source, Object target) {
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof byte[])) return false;
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte[] targ = (byte[]) target;
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(Object source, Object target) {
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // for some reason, the correct arrayEquals is not being called
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // so do it by hand for now.
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof Object[])
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((Object[]) source,target));
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof int[])
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((int[]) source,target));
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof double[])
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((double[]) source, target));
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof byte[])
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((byte[]) source,target));
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return source.equals(target);
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System.
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Object[] target, int targetStart,
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!arrayEquals(source[i],target[i + delta]))
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System.
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char[] target, int targetStart,
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i]!=target[i + delta])
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
132f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    /**
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two int[]s.
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(int[] source, int sourceStart,
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] target, int targetStart,
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i] != target[i + delta])
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two arrays of doubles.
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(double[] source, int sourceStart,
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            double[] target, int targetStart,
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i] != target[i + delta])
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte[] target, int targetStart, int len){
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i] != target[i + delta])
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
181f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * Trivial reference equality.
182f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * This method should help document that we really want == not equals(),
183f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * and to have a single place to suppress warnings from static analysis tools.
184f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     */
185f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    public static final boolean sameObjects(Object a, Object b) {
186f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return a == b;
187f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    }
188f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
189f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    /**
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility. Does null checks on objects, then calls equals.
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean objectEquals(Object a, Object b) {
193f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return a == null ?
194f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                b == null ? true : false :
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    b == null ? false : a.equals(b);
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
197f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility. Does null checks on objects, then calls compare.
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Comparable<T>> int checkCompare(T a, T b) {
202f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return a == null ?
203f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                b == null ? 0 : -1 :
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    b == null ? 1 : a.compareTo(b);
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility. Does null checks on object, then calls hashCode.
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int checkHash(Object a) {
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return a == null ? 0 : a.hashCode();
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
213f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE character is used during run-length encoding.  It signals
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a run of identical chars.
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char ESCAPE = '\uA5A5';
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE_BYTE character is used during run-length encoding.  It signals
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a run of identical bytes.
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final byte ESCAPE_BYTE = (byte)0xA5;
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing an int array.  Use run-length encoding.
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A character represents itself, unless it is the ESCAPE character.  Then
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following notations are possible:
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE ESCAPE   ESCAPE literal
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE n c      n instances of character c
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE, we represent this as:
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   c ESCAPE n-1 c
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE value is chosen so as not to collide with commonly
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(int[] a) {
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        appendInt(buffer, a.length);
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runValue = a[0];
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int s = a[i];
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s == runValue && runLength < 0xFFFF) {
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++runLength;
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, runValue, runLength);
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = s;
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, runValue, runLength);
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing a short array.  Use run-length encoding.
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A character represents itself, unless it is the ESCAPE character.  Then
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following notations are possible:
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE ESCAPE   ESCAPE literal
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE n c      n instances of character c
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE, we represent this as:
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   c ESCAPE n-1 c
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE value is chosen so as not to collide with commonly
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(short[] a) {
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) (a.length >> 16));
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) a.length);
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        short runValue = a[0];
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            short s = a[i];
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s == runValue && runLength < 0xFFFF) ++runLength;
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, runValue, runLength);
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = s;
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, runValue, runLength);
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing a char array.  Use run-length encoding.
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A character represents itself, unless it is the ESCAPE character.  Then
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following notations are possible:
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE ESCAPE   ESCAPE literal
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE n c      n instances of character c
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE, we represent this as:
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   c ESCAPE n-1 c
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE value is chosen so as not to collide with commonly
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(char[] a) {
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) (a.length >> 16));
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) a.length);
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char runValue = a[0];
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char s = a[i];
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s == runValue && runLength < 0xFFFF) ++runLength;
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, (short)runValue, runLength);
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = s;
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, (short)runValue, runLength);
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing a byte array.  Use run-length encoding.
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Two bytes are packed into a single char, with a single extra zero byte at
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the end if needed.  A byte represents itself, unless it is the
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ESCAPE_BYTE.  Then the following notations are possible:
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE_BYTE n b           n instances of byte b
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   b ESCAPE_BYTE n-1 b
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE_BYTE value is chosen so as not to collide with commonly
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(byte[] a) {
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) (a.length >> 16));
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) a.length);
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte runValue = a[0];
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte[] state = new byte[2];
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte b = a[i];
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (b == runValue && runLength < 0xFF) ++runLength;
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, runValue, runLength, state);
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = b;
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, runValue, runLength, state);
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We must save the final byte, if there is one, by padding
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // an extra zero.
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Encode a run, possibly a degenerate run (of < 4 values).
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param length The length of the run; must be > 0 && <= 0xFFFF.
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) {
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (length < 4) {
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int j=0; j<length; ++j) {
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (value == ESCAPE) {
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    appendInt(buffer, value);
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendInt(buffer, value);
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
377f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            if (length == ESCAPE) {
378f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                if (value == ESCAPE) {
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    appendInt(buffer, ESCAPE);
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendInt(buffer, value);
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --length;
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendInt(buffer, ESCAPE);
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendInt(buffer, length);
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendInt(buffer, value); // Don't need to escape this value
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void appendInt(T buffer, int value) {
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append((char)(value >>> 16));
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append((char)(value & 0xFFFF));
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Encode a run, possibly a degenerate run (of < 4 values).
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param length The length of the run; must be > 0 && <= 0xFFFF.
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) {
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
405f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            char valueChar = (char) value;
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (length < 4) {
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int j=0; j<length; ++j) {
408f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    if (valueChar == ESCAPE) {
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(ESCAPE);
410f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    }
411f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    buffer.append(valueChar);
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
415f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                if (length == ESCAPE) {
416f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    if (valueChar == ESCAPE) {
417f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                        buffer.append(ESCAPE);
418f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    }
419f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    buffer.append(valueChar);
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    --length;
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(ESCAPE);
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append((char) length);
424f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                buffer.append(valueChar); // Don't need to escape this value
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Encode a run, possibly a degenerate run (of < 4 values).
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param length The length of the run; must be > 0 && <= 0xFF.
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length,
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte[] state) {
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (length < 4) {
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int j=0; j<length; ++j) {
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendEncodedByte(buffer, value, state);
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
444f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            if ((byte)length == ESCAPE_BYTE) {
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendEncodedByte(buffer, value, state);
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --length;
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendEncodedByte(buffer, ESCAPE_BYTE, state);
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendEncodedByte(buffer, (byte)length, state);
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendEncodedByte(buffer, value, state); // Don't need to escape this value
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append a byte to the given Appendable, packing two bytes into each
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character.  The state parameter maintains intermediary data between
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * calls.
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param state A two-element array, with state[0] == 0 if this is the
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * first byte of a pair, or state[0] != 0 if this is the second byte
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of a pair, in which case state[1] is the first byte.
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value,
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte[] state) {
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (state[0] != 0) {
467f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                char c = (char) ((state[1] << 8) | ((value) & 0xFF));
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(c);
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state[0] = 0;
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state[0] = 1;
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state[1] = value;
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of ints from a run-length encoded string.
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final int[] RLEStringToIntArray(String s) {
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int length = getInt(s, 0);
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] array = new int[length];
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ai = 0, i = 1;
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int maxI = s.length() / 2;
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (ai < length && i < maxI) {
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = getInt(s, i++);
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == ESCAPE) {
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = getInt(s, i++);
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ESCAPE) {
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = c;
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int runLength = c;
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int runValue = getInt(s, i++);
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int j=0; j<runLength; ++j) {
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        array[ai++] = runValue;
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                array[ai++] = c;
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ai != length || i != maxI) {
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded int array");
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final int getInt(String s, int i) {
516f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return ((s.charAt(2*i)) << 16) | s.charAt(2*i+1);
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of shorts from a run-length encoded string.
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final short[] RLEStringToShortArray(String s) {
523f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        short[] array = new short[length];
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ai = 0;
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=2; i<s.length(); ++i) {
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i);
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == ESCAPE) {
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(++i);
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ESCAPE) {
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = (short) c;
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
533f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    int runLength = c;
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    short runValue = (short) s.charAt(++i);
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                array[ai++] = (short) c;
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ai != length)
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded short array");
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of shorts from a run-length encoded string.
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final char[] RLEStringToCharArray(String s) {
553f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char[] array = new char[length];
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ai = 0;
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=2; i<s.length(); ++i) {
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i);
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == ESCAPE) {
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(++i);
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ESCAPE) {
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = c;
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
563f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    int runLength = c;
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char runValue = s.charAt(++i);
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                array[ai++] = c;
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ai != length)
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded short array");
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of bytes from a run-length encoded string.
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final byte[] RLEStringToByteArray(String s) {
583f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte[] array = new byte[length];
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean nextChar = true;
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char c = 0;
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int node = 0;
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 0;
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 2;
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int ai=0; ai<length; ) {
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // This part of the loop places the next byte into the local
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // variable 'b' each time through the loop.  It keeps the
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // current character in 'c' and uses the boolean 'nextChar'
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // to see if we've taken both bytes out of 'c' yet.
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte b;
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (nextChar) {
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(i++);
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b = (byte) (c >> 8);
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                nextChar = false;
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b = (byte) (c & 0xFF);
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                nextChar = true;
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // This part of the loop is a tiny state machine which handles
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the parsing of the run-length encoding.  This would be simpler
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if we could look ahead, but we can't, so we use 'node' to
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // move between three nodes in the state machine.
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch (node) {
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 0:
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Normal idle node
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (b == ESCAPE_BYTE) {
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = 1;
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else {
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = b;
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 1:
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We have seen one ESCAPE_BYTE; we expect either a second
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // one, or a run length and value.
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (b == ESCAPE_BYTE) {
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = ESCAPE_BYTE;
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = 0;
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else {
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    runLength = b;
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Interpret signed byte as unsigned
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (runLength < 0) runLength += 0x100;
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = 2;
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 2:
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We have seen an ESCAPE_BYTE and length byte.  We interpret
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // the next byte as the value to be repeated.
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int j=0; j<runLength; ++j) array[ai++] = b;
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = 0;
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (node != 0)
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded byte array");
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (i != s.length())
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Excess data in RLE byte array string");
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public String LINE_SEPARATOR = System.getProperty("line.separator");
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Format a String for representation in a source file.  This includes
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * breaking it into lines and escaping characters using octal notation
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * when necessary (control characters and double quotes).
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String formatForSource(String s) {
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length();) {
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append("        \"");
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int count = 11;
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (i<s.length() && count<80) {
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char c = s.charAt(i++);
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c < '\u0020' || c == '"' || c == '\\') {
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (c == '\n') {
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append("\\n");
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 2;
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (c == '\t') {
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append("\\t");
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 2;
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (c == '\r') {
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append("\\r");
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 2;
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Represent control characters, backslash and double quote
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // using octal notation; otherwise the string we form
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // won't compile, since Unicode escape sequences are
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // processed before tokenization.
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append('\\');
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(HEX_DIGIT[(c & 0007)]);
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 4;
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else if (c <= '\u007E') {
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(c);
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    count += 1;
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else {
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\u");
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0x000F)]);
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    count += 6;
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append('"');
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        '8','9','A','B','C','D','E','F'};
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Format a String for representation in a source file.  Like
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * formatForSource but does not do line breaking.
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String format1ForSource(String s) {
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append("\"");
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length();) {
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i++);
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c < '\u0020' || c == '"' || c == '\\') {
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == '\n') {
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\n");
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c == '\t') {
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\t");
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c == '\r') {
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\r");
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Represent control characters, backslash and double quote
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // using octal notation; otherwise the string we form
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // won't compile, since Unicode escape sequences are
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // processed before tokenization.
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append('\\');
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0007)]);
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (c <= '\u007E') {
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(c);
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append("\\u");
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0x000F)]);
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append('"');
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert characters outside the range U+0020 to U+007F to
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Unicode escapes, and convert backslash to a double backslash.
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final String escape(String s) {
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length(); ) {
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = Character.codePointAt(s, i);
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i += UTF16.getCharCount(c);
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c >= ' ' && c <= 0x007F) {
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == '\\') {
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append("\\\\"); // That is, "\\"
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append((char)c);
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                boolean four = c <= 0xFFFF;
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(four ? "\\u" : "\\U");
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(hex(c, four ? 4 : 8));
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static private final char[] UNESCAPE_MAP = {
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*"   0x22, 0x22 */
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*'   0x27, 0x27 */
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*?   0x3F, 0x3F */
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*\   0x5C, 0x5C */
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*a*/ 0x61, 0x07,
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*b*/ 0x62, 0x08,
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*e*/ 0x65, 0x1b,
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*f*/ 0x66, 0x0c,
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*n*/ 0x6E, 0x0a,
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*r*/ 0x72, 0x0d,
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*t*/ 0x74, 0x09,
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*v*/ 0x76, 0x0b
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert an escape to a 32-bit code point value.  We attempt
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to parallel the icu4c unescapeAt() function.
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param offset16 an array containing offset to the character
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <em>after</em> the backslash.  Upon return offset16[0] will
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * be updated to point after the escape sequence.
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return character value from 0 to 10FFFF, or -1 on error.
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int unescapeAt(String s, int[] offset16) {
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c;
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int result = 0;
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int n = 0;
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int minDig = 0;
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int maxDig = 0;
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int bitsPerDigit = 4;
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int dig;
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean braces = false;
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Check that offset is in range */
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int offset = offset16[0];
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int length = s.length();
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (offset < 0 || offset >= length) {
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Fetch first UChar after '\\' */
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        c = Character.codePointAt(s, offset);
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        offset += UTF16.getCharCount(c);
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Convert hexadecimal and octal escapes */
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        switch (c) {
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 'u':
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDig = maxDig = 4;
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 'U':
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDig = maxDig = 8;
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 'x':
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDig = 1;
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++offset;
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                braces = true;
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maxDig = 8;
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maxDig = 2;
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        default:
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dig = UCharacter.digit(c, 8);
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (dig >= 0) {
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                minDig = 1;
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maxDig = 3;
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                n = 1; /* Already have first octal digit */
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                bitsPerDigit = 3;
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result = dig;
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (minDig != 0) {
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (offset < length && n < maxDig) {
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = UTF16.charAt(s, offset);
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (dig < 0) {
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result = (result << bitsPerDigit) | dig;
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                offset += UTF16.getCharCount(c);
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++n;
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n < minDig) {
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (braces) {
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c != 0x7D /*}*/) {
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++offset;
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (result < 0 || result >= 0x110000) {
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If an escape sequence specifies a lead surrogate, see
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if there is a trail surrogate after it, either as an
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // escape or as a literal.  If so, join them up into a
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // supplementary.
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (offset < length &&
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    UTF16.isLeadSurrogate((char) result)) {
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int ahead = offset+1;
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(offset); // [sic] get 16-bit code unit
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == '\\' && ahead < length) {
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int o[] = new int[] { ahead };
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c = unescapeAt(s, o);
8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ahead = o[0];
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (UTF16.isTrailSurrogate((char) c)) {
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    offset = ahead;
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result = Character.toCodePoint((char) result, (char) c);
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset16[0] = offset;
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Convert C-style escapes in table */
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == UNESCAPE_MAP[i]) {
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                offset16[0] = offset;
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return UNESCAPE_MAP[i+1];
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (c < UNESCAPE_MAP[i]) {
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Map \cX to control-X: X & 0x1F */
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c == 'c' && offset < length) {
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c = UTF16.charAt(s, offset);
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset16[0] = offset + UTF16.getCharCount(c);
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0x1F & c;
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* If no special forms are recognized, then consider
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * the backslash to generically escape the next character. */
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        offset16[0] = offset;
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return c;
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert all escapes in a given string using unescapeAt().
9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception IllegalArgumentException if an invalid escape is
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen.
9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String unescape(String s) {
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] pos = new int[1];
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length(); ) {
9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i++);
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == '\\') {
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos[0] = i;
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int e = unescapeAt(s, pos);
9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (e < 0) {
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new IllegalArgumentException("Invalid escape sequence " +
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            s.substring(i-1, Math.min(i+8, s.length())));
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.appendCodePoint(e);
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i = pos[0];
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(c);
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert all escapes in a given string using unescapeAt().
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Leave invalid escape sequences unchanged.
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String unescapeLeniently(String s) {
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] pos = new int[1];
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length(); ) {
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i++);
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == '\\') {
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos[0] = i;
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int e = unescapeAt(s, pos);
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (e < 0) {
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append(c);
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.appendCodePoint(e);
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    i = pos[0];
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(c);
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * "0041".
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String hex(long ch) {
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return hex(ch, 4);
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Supplies a zero-padded hex representation of an integer (without 0x)
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public String hex(long i, int places) {
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (i == Long.MIN_VALUE) return "-8000000000000000";
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean negative = i < 0;
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (negative) {
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = -i;
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (result.length() < places) {
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result = "0000000000000000".substring(result.length(),places) + result;
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (negative) {
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return '-' + result;
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a string to comma-separated groups of 4 hex uppercase
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digits.  E.g., hex('ab') => "0041,0042".
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String hex(CharSequence s) {
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return hex(s, 4, ",", true, new StringBuilder()).toString();
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a string to separated groups of hex uppercase
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to the given Appendable.
10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (useCodePoints) {
10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int cp;
10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    cp = Character.codePointAt(s, i);
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (i != 0) {
10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        result.append(separator);
10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(hex(cp,width));
10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int i = 0; i < s.length(); ++i) {
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (i != 0) {
10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        result.append(separator);
10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(hex(s.charAt(i),width));
10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String hex(byte[] o, int start, int end, String separator) {
10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder();
10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //int ch;
10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = start; i < end; ++i) {
10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller          if (i != 0) result.append(separator);
10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller          result.append(hex(o[i]));
10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.toString();
10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a string to comma-separated groups of 4 hex uppercase
10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digits.  E.g., hex('ab') => "0041,0042".
10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <S extends CharSequence> String hex(S s, int width, S separator) {
10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return hex(s, width, separator, true, new StringBuilder()).toString();
1051f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    }
10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Split a string into pieces based on the given divider character
10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the string to split
10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param divider the character on which to split.  Occurrences of
10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * this character are not included in the output
10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param output an array to receive the substrings between
10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * instances of divider.  It must be large enough on entry to
10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * accomodate all output.  Adjacent instances of the divider
10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character will place empty strings into output.  Before
10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returning, output is padded out with empty strings.
10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void split(String s, char divider, String[] output) {
10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int last = 0;
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int current = 0;
10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = 0; i < s.length(); ++i) {
10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s.charAt(i) == divider) {
10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                output[current++] = s.substring(last,i);
10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                last = i+1;
10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        output[current++] = s.substring(last,i);
10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (current < output.length) {
10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            output[current++] = "";
10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Split a string into pieces based on the given divider character
10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the string to split
10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param divider the character on which to split.  Occurrences of
10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * this character are not included in the output
10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return output an array to receive the substrings between
10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * instances of divider. Adjacent instances of the divider
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character will place empty strings into output.
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String[] split(String s, char divider) {
10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int last = 0;
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ArrayList<String> output = new ArrayList<String>();
10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = 0; i < s.length(); ++i) {
10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s.charAt(i) == divider) {
10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                output.add(s.substring(last,i));
10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                last = i+1;
10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        output.add( s.substring(last,i));
11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return output.toArray(new String[output.size()]);
11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Look up a given string in a string array.  Returns the index at
11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * which the first occurrence of the string was found in the
11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * array, or -1 if it was not found.
11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source the string to search for
11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param target the array of zero or more strings in which to
11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * look for source
11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the index of target at which source first occurs, or -1
11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * if not found
11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int lookup(String source, String[] target) {
11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < target.length; ++i) {
11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source.equals(target[i])) return i;
11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a single non-whitespace character 'ch', optionally
11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * preceded by whitespace.
11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param id the string to be parsed
11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * offset of the first character to be parsed.  On output, pos[0]
11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * is the index after the last parsed character.  If the parse
11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * fails, pos[0] will be unchanged.
11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ch the non-whitespace character to be parsed.
11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if 'ch' is seen preceded by zero or more
11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * whitespace characters.
11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static boolean parseChar(String id, int[] pos, char ch) {
11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int start = pos[0];
11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pos[0] = PatternProps.skipWhiteSpace(id, pos[0]);
11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (pos[0] == id.length() ||
11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                id.charAt(pos[0]) != ch) {
11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos[0] = start;
11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
11392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ++pos[0];
11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a pattern string starting at offset pos.  Keywords are
11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * matched case-insensitively.  Spaces may be skipped and may be
11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * optional or required.  Integer values may be parsed, and if
11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * they are, they will be returned in the given array.  If
11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * successful, the offset of the next non-space character is
11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returned.  On failure, -1 is returned.
11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern must only contain lowercase characters, which
11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will match their uppercase equivalents as well.  A space
11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character matches one or more required spaces.  A '~' character
11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * matches zero or more optional spaces.  A '#' character matches
11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * an integer and stores it in parsedInts, which the caller must
11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ensure has enough capacity.
11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param parsedInts array to receive parsed integers.  Caller
11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * must ensure that parsedInts.length is >= the number of '#'
11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * signs in 'pattern'.
11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the position after the last character parsed, or -1 if
11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the parse failed
11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @SuppressWarnings("fallthrough")
11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parsePattern(String rule, int pos, int limit,
11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String pattern, int[] parsedInts) {
11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO Update this to handle surrogates
11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] p = new int[1];
11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int intCount = 0; // number of integers parsed
11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<pattern.length(); ++i) {
11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char cpat = pattern.charAt(i);
11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c;
11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch (cpat) {
11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case ' ':
11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (pos >= limit) {
11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = rule.charAt(pos++);
11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (!PatternProps.isWhiteSpace(c)) {
11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // FALL THROUGH to skipWhitespace
11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '~':
11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos = PatternProps.skipWhiteSpace(rule, pos);
11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '#':
11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                p[0] = pos;
11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                parsedInts[intCount++] = parseInteger(rule, p, limit);
11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (p[0] == pos) {
11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Syntax error; failed to parse integer
11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos = p[0];
11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            default:
11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (pos >= limit) {
11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c != cpat) {
12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return pos;
12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a pattern string within the given Replaceable and a parsing
12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pattern.  Characters are matched literally and case-sensitively
12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * except for the following special characters:
12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ~  zero or more Pattern_White_Space chars
12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If end of pattern is reached with all matches along the way,
12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pos is advanced to the first unparsed index and returned.
12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Otherwise -1 is returned.
12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pat pattern that controls parsing
12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text text to be parsed, starting at index
12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param index offset to first character to parse
12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param limit offset after last character to parse
12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index after last parsed character, or -1 on parse failure.
12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parsePattern(String pat,
12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Replaceable text,
12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index,
12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int limit) {
12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ipat = 0;
12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // empty pattern matches immediately
12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ipat == pat.length()) {
12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return index;
12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cpat = Character.codePointAt(pat, ipat);
12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (index < limit) {
12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = text.char32At(index);
12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // parse \s*
12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (cpat == '~') {
12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (PatternProps.isWhiteSpace(c)) {
12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    index += UTF16.getCharCount(c);
12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (++ipat == pat.length()) {
12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return index; // success; c unparsed
12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // fall thru; process c again with next cpat
12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // parse literal
12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (c == cpat) {
12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int n = UTF16.getCharCount(c);
12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index += n;
12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ipat += n;
12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (ipat == pat.length()) {
12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return index; // success; c parsed
12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // fall thru; get next cpat
12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // match failure of literal
12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cpat = UTF16.charAt(pat, ipat);
12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1; // text ended before end of pat
12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse an integer at pos, either of the form \d+ or of the form
12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or octal format.
12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUTPUT parameter.  On input, the first
12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character to parse.  On output, the character after the last
12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * parsed character.
12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parseInteger(String rule, int[] pos, int limit) {
12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int count = 0;
12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int value = 0;
12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p = pos[0];
12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int radix = 10;
12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (rule.regionMatches(true, p, "0x", 0, 2)) {
12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p += 2;
12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            radix = 16;
12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (p < limit && rule.charAt(p) == '0') {
12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p++;
12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            count = 1;
12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            radix = 8;
12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (p < limit) {
12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int d = UCharacter.digit(rule.charAt(p++), radix);
13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (d < 0) {
13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --p;
13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++count;
13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int v = (value * radix) + d;
13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (v <= value) {
13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If there are too many input digits, at some point
13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // the value will go negative, e.g., if we have seen
13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // "0x8000000" already and there is another '0', when
13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // we parse the next 0 the value will go negative.
13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            value = v;
13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (count > 0) {
13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos[0] = p;
13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return value;
13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a Unicode identifier from the given string at the given
13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * position.  Return the identifier, or null if there is no
13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * identifier.
13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param str the string to parse
13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * first character to examine.  It must be less than str.length(),
13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and it must not point to a whitespace character.  That is, must
13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * have pos[0] < str.length().  On
13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * OUTPUT, the position after the last parsed character.
13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the Unicode identifier, or null if there is no valid
13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * identifier at pos[0].
13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String parseUnicodeIdentifier(String str, int[] pos) {
13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(pos[0] < str.length());
13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p = pos[0];
13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (p < str.length()) {
13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int ch = Character.codePointAt(str, p);
13402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (buf.length() == 0) {
13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (UCharacter.isUnicodeIdentifierStart(ch)) {
13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.appendCodePoint(ch);
13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return null;
13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (UCharacter.isUnicodeIdentifierPart(ch)) {
13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.appendCodePoint(ch);
13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p += UTF16.getCharCount(ch);
13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pos[0] = p;
13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final char DIGITS[] = {
13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        'U', 'V', 'W', 'X', 'Y', 'Z'
13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append the digits of a positive integer to the given
13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <code>Appendable</code> in the given radix. This is
13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * done recursively since it is easiest to generate the low-
13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * order digit first, but it must be appended last.
13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param result is the <code>Appendable</code> to append to
13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param n is the positive integer
13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param radix is the radix, from 2 to 36 inclusive
13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minDigits is the minimum number of digits to append.
13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static <T extends Appendable> void recursiveAppendNumber(T result, int n,
13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int radix, int minDigits)
13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int digit = n % radix;
13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n >= radix || minDigits > 1) {
13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append(DIGITS[digit]);
13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append a number to the given Appendable in the given radix.
13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * radices 11 through 36.
13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param result the digits of the number are appended here
13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param n the number to be converted to digits; may be negative.
13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If negative, a '-' is prepended to the digits.
13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param radix a radix from 2 to 36 inclusive.
14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minDigits the minimum number of digits, not including
14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * any '-', to produce.  Values less than 2 have no effect.  One
14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digit is always emitted regardless of this parameter.
14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to result
14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Appendable> T appendNumber(T result, int n,
14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int radix, int minDigits)
14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (radix < 2 || radix > 36) {
14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException("Illegal radix " + radix);
14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int abs = n;
14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n < 0) {
14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                abs = -n;
14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append("-");
14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            recursiveAppendNumber(result, abs, radix, minDigits);
14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse an unsigned 31-bit integer at the given offset.  Use
14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UCharacter.digit() to parse individual characters into digits.
14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text the text to be parsed
14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * offset within text at which to start parsing; it should point
14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to a valid digit.  On exit, pos[0] is the offset after the last
14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * parsed character.  If the parse failed, it will be unchanged on
14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * exit.  Must be >= 0 on entry.
14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param radix the radix in which to parse; must be >= 2 and <=
14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 36.
14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a non-negative parsed number, or -1 upon parse failure.
14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse fails if there are no digits, that is, if pos[0] does not
14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * point to a valid digit on entry, or if the number to be parsed
14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * does not fit into a 31-bit unsigned integer.
14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parseNumber(String text, int[] pos, int radix) {
14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(pos[0] >= 0);
14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(radix >= 2);
14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(radix <= 36);
14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int n = 0;
14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p = pos[0];
14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (p < text.length()) {
14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int ch = Character.codePointAt(text, p);
14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int d = UCharacter.digit(ch, radix);
14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (d < 0) {
14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = radix*n + d;
14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // ASSUME that when a 32-bit integer overflows it becomes
14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // negative.  E.g., 214748364 * 10 + 8 => negative value.
14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n < 0) {
14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++p;
14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (p == pos[0]) {
14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pos[0] = p;
14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return n;
14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return true if the character is NOT printable ASCII.  The tab,
14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * newline and linefeed characters are considered unprintable.
14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static boolean isUnprintable(int c) {
14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //0x20 = 32 and 0x7E = 126
14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !(c >= 0x20 && c <= 0x7E);
14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Escape unprintable characters using <backslash>uxxxx notation
14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * above.  If the character is printable ASCII, then do nothing
14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and return FALSE.  Otherwise, append the escaped notation and
14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * return TRUE.
14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (isUnprintable(c)) {
14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append('\\');
14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if ((c & ~0xFFFF) != 0) {
14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append('U');
14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>28)]);
14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>24)]);
14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>20)]);
14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>16)]);
14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append('u');
15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&(c>>12)]);
15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&(c>>8)]);
15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&(c>>4)]);
15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&c]);
15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the index of the first character in a set, ignoring quoted text.
15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * found by a search for "h".  Unlike String.indexOf(), this method searches
15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * not for a single character, but for any character of the string
15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <code>setOfChars</code>.
15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text text to be searched
15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start the beginning index, inclusive; <code>0 <= start
15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <= limit</code>.
15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param limit the ending index, exclusive; <code>start <= limit
15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <= text.length()</code>.
15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param setOfChars string with one or more distinct characters
15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return Offset of the first character in <code>setOfChars</code>
15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * found, or -1 if not found.
15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see String#indexOf
15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int quotedIndexOf(String text, int start, int limit,
15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String setOfChars) {
15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=start; i<limit; ++i) {
15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = text.charAt(i);
15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == BACKSLASH) {
15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (c == APOSTROPHE) {
15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (++i < limit
15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        && text.charAt(i) != APOSTROPHE) {}
15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (setOfChars.indexOf(c) >= 0) {
15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append a character to a rule that is being built up.  To flush
15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the quoteBuf to rule, make one final call with isLiteral == true.
15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If there is no final character, pass in (int)-1 as c.
15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param rule the string to append the character to
15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c the character to append, or (int)-1 if none.
15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param isLiteral if true, then the given character should not be
15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * quoted or escaped.  Usually this means it is a syntactic element
15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * such as > or $
15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param escapeUnprintable if true, then unprintable characters
15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * should be escaped using escapeUnprintable().  These escapes will
15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * appear outside of quotes.
15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param quoteBuf a buffer which is used to build up quoted
15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * substrings.  The caller should initially supply an empty buffer,
15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and thereafter should not modify the buffer.  The buffer should be
15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * cleared out by, at the end, calling this method with a literal
15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character (which may be -1).
15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void appendToRule(StringBuffer rule,
15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c,
15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean isLiteral,
15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable,
15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuffer quoteBuf) {
15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If we are escaping unprintables, then escape them outside
15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // quotes.  \\u and \\U are not recognized within quotes.  The same
15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // logic applies to literals, but literals are never escaped.
15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (isLiteral ||
15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (escapeUnprintable && Utility.isUnprintable(c))) {
15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (quoteBuf.length() > 0) {
15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We prefer backslash APOSTROPHE to double APOSTROPHE
15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // (more readable, less similar to ") so if there are
15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // double APOSTROPHEs at the ends, we pull them outside
15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // of the quote.
15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If the first thing in the quoteBuf is APOSTROPHE
15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // (doubled) then pull it out.
15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (quoteBuf.length() >= 2 &&
15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(0) == APOSTROPHE &&
15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(1) == APOSTROPHE) {
15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(BACKSLASH).append(APOSTROPHE);
15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    quoteBuf.delete(0, 2);
15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If the last thing in the quoteBuf is APOSTROPHE
15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // (doubled) then remove and count it and add it after.
15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int trailingCount = 0;
15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (quoteBuf.length() >= 2 &&
15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    quoteBuf.setLength(quoteBuf.length()-2);
15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++trailingCount;
15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (quoteBuf.length() > 0) {
15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(APOSTROPHE);
15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(quoteBuf);
16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(APOSTROPHE);
16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    quoteBuf.setLength(0);
16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (trailingCount-- > 0) {
16042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(BACKSLASH).append(APOSTROPHE);
16052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
16062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c != -1) {
16082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /* Since spaces are ignored during parsing, they are
16092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * emitted only for readability.  We emit one here
16102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * only if there isn't already one at the end of the
16112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * rule.
16122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
16132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ' ') {
16142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int len = rule.length();
16152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (len > 0 && rule.charAt(len-1) != ' ') {
16162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        rule.append(' ');
16172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
16182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
16192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.appendCodePoint(c);
16202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
16212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Escape ' and '\' and don't begin a quote just for them
16252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if (quoteBuf.length() == 0 &&
16262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (c == APOSTROPHE || c == BACKSLASH)) {
16272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rule.append(BACKSLASH).append((char)c);
16282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Specials (printable ascii that isn't [0-9a-zA-Z]) and
16312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // whitespace need quoting.  Also append stuff to quotes if we are
16322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // building up a quoted substring already.
16332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if (quoteBuf.length() > 0 ||
16342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (c >= 0x0021 && c <= 0x007E &&
16352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
16372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
16382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                PatternProps.isWhiteSpace(c)) {
16392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            quoteBuf.appendCodePoint(c);
16402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Double ' within a quote
16412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == APOSTROPHE) {
16422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                quoteBuf.append((char)c);
16432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Otherwise just append
16472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
16482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rule.appendCodePoint(c);
16492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append the given string to the rule.  Calls the single-character
16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * version of appendToRule for each character.
16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void appendToRule(StringBuffer rule,
16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String text,
16582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean isLiteral,
16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable,
16602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuffer quoteBuf) {
16612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<text.length(); ++i) {
16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Okay to process in 16-bit code units here
16632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
16642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Given a matcher reference, which may be null, append its
16692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pattern as a literal to the given rule.
16702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void appendToRule(StringBuffer rule,
16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeMatcher matcher,
16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable,
16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuffer quoteBuf) {
16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (matcher != null) {
16762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendToRule(rule, matcher.toPattern(escapeUnprintable),
16772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    true, escapeUnprintable, quoteBuf);
16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compares 2 unsigned integers
16832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source 32 bit unsigned integer
16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param target 32 bit unsigned integer
16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return 0 if equals, 1 if source is greater than target and -1
16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         otherwise
16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int compareUnsigned(int source, int target)
16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        source += MAGIC_UNSIGNED;
16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        target += MAGIC_UNSIGNED;
16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source < target) {
16932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
1694f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        }
16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if (source > target) {
16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 1;
16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return 0;
16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Find the highest bit in a positive integer. This is done
17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * by doing a binary search through the bits.
17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param n is the integer
17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the bit number of the highest bit, with 0 being
17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the low order bit, or -1 if <code>n</code> is not positive
17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final byte highBit(int n)
17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
17122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n <= 0) {
17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte bit = 0;
17172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 16) {
17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 16;
17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 16;
17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 8) {
17242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 8;
17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 8;
17262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 4) {
17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 4;
17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 4;
17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 2) {
17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 2;
17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 2;
17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 1) {
17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 1;
17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 1;
17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return bit;
17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility method to take a int[] containing codepoints and return
1747f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * a string representation with code units.
17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String valueOf(int[]source){
17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: Investigate why this method is not on UTF16 class
17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder(source.length);
17522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int i=0; i<source.length; i++){
17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.appendCodePoint(source[i]);
17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.toString();
17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility to duplicate a string count times
17612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s String to be duplicated.
17622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param count Number of times to duplicate a string.
17632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String repeat(String s, int count) {
17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (count <= 0) return "";
17662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (count == 1) return s;
17672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder();
17682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < count; ++i) {
17692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append(s);
17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.toString();
17722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1773f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String[] splitString(String src, String target) {
17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src.split("\\Q" + target + "\\E");
17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Split the string at runs of ascii whitespace characters.
17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String[] splitWhitespace(String src) {
17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src.split("\\s+");
17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a list of hex numbers and return a string
17872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param string String of hex numbers.
17882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minLength Minimal length.
17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param separator Separator.
17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A string from hex numbers.
17912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String fromHex(String string, int minLength, String separator) {
17932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+"));
17942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1795f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
17962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a list of hex numbers and return a string
17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param string String of hex numbers.
17992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minLength Minimal length.
18002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param separator Separator.
18012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A string from hex numbers.
18022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String fromHex(String string, int minLength, Pattern separator) {
18042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String[] parts = separator.split(string);
18062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (String part : parts) {
18072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (part.length() < minLength) {
18082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException("code point too short: " + part);
18092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int cp = Integer.parseInt(part, 16);
18112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.appendCodePoint(cp);
18122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
18142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1815495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
1816495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    /**
1817495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * This implementation is equivalent to Java 7+ Objects#equals(Object a, Object b)
1818495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     *
1819495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @param a an object
1820495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @param b an object to be compared with a for equality
1821495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @return true if the arguments are equal to each other and false otherwise
1822495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     */
1823495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    public static boolean equals(Object a, Object b) {
1824495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        return (a == b)
1825495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert                || (a != null && b != null && a.equals(b));
1826495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    }
1827495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
1828495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    /**
1829495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * This implementation is equivalent to Java 7+ Objects#hash(Object... values)
1830495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @param values the values to be hashed
1831495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @return a hash value of the sequence of input values
1832495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     */
1833495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    public static int hash(Object... values) {
1834495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        return Arrays.hashCode(values);
1835495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    }
1836495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
1837495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    /**
1838495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * This implementation is equivalent to Java 7+ Objects#hashCode(Object o)
1839495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @param o an object
1840495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @return a hash value of a non-null argument and 0 for null argument
1841495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     */
1842495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    public static int hashCode(Object o) {
1843495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        return o == null ? 0 : o.hashCode();
1844495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    }
1845495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert
1846495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    /**
1847495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * This implementation is equivalent to Java 7+ Objects#toString(Object o)
1848495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @param o an object
1849495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * @return the result of calling toStirng for a non-null argument and "null" for a
1850495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     * null argument
1851495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert     */
1852495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    public static String toString(Object o) {
1853495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert        return o == null ? "null" : o.toString();
1854495cb271e305cfb399d463f32210a371198f0abfFredrik Roubert    }
18552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
1856