12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 1996-2015, International Business Machines Corporation and    *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.                                                *
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Locale;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.regex.Pattern;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Replaceable;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeMatcher;
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
221537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/**
231537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android
24836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class Utility {
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char APOSTROPHE = '\'';
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char BACKSLASH  = '\\';
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int MAGIC_UNSIGNED = 0x80000000;
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s.
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(Object[] source, Object target) {
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof Object[])) return false;
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Object[] targ = (Object[]) target;
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two int[]s
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(int[] source, Object target) {
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof int[])) return false;
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] targ = (int[]) target;
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two double[]s
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(double[] source, Object target) {
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof double[])) return false;
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        double[] targ = (double[]) target;
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(byte[] source, Object target) {
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!(target instanceof byte[])) return false;
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte[] targ = (byte[]) target;
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (source.length == targ.length
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                && arrayRegionMatches(source, 0, targ, 0, source.length));
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayEquals(Object source, Object target) {
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source == null) return (target == null);
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // for some reason, the correct arrayEquals is not being called
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // so do it by hand for now.
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof Object[])
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((Object[]) source,target));
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof int[])
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((int[]) source,target));
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof double[])
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((double[]) source, target));
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source instanceof byte[])
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return(arrayEquals((byte[]) source,target));
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return source.equals(target);
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System.
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Object[] target, int targetStart,
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!arrayEquals(source[i],target[i + delta]))
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two Object[]s
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System.
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char[] target, int targetStart,
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i]!=target[i + delta])
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
131f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    /**
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two int[]s.
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(int[] source, int sourceStart,
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] target, int targetStart,
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i] != target[i + delta])
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility to compare two arrays of doubles.
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param len the length to compare.
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The start indices and start+len must be valid.
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Ought to be in System
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(double[] source, int sourceStart,
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            double[] target, int targetStart,
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int len)
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i] != target[i + delta])
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte[] target, int targetStart, int len){
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceEnd = sourceStart + len;
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int delta = targetStart - sourceStart;
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = sourceStart; i < sourceEnd; i++) {
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source[i] != target[i + delta])
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
180f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * Trivial reference equality.
181f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * This method should help document that we really want == not equals(),
182f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * and to have a single place to suppress warnings from static analysis tools.
183f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     */
184f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    public static final boolean sameObjects(Object a, Object b) {
185f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return a == b;
186f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    }
187f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
188f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    /**
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility. Does null checks on objects, then calls equals.
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public final static boolean objectEquals(Object a, Object b) {
192f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return a == null ?
193f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                b == null ? true : false :
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    b == null ? false : a.equals(b);
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
196f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility. Does null checks on objects, then calls compare.
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Comparable<T>> int checkCompare(T a, T b) {
201f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return a == null ?
202f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                b == null ? 0 : -1 :
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    b == null ? 1 : a.compareTo(b);
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convenience utility. Does null checks on object, then calls hashCode.
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int checkHash(Object a) {
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return a == null ? 0 : a.hashCode();
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
212f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE character is used during run-length encoding.  It signals
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a run of identical chars.
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final char ESCAPE = '\uA5A5';
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE_BYTE character is used during run-length encoding.  It signals
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a run of identical bytes.
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final byte ESCAPE_BYTE = (byte)0xA5;
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing an int array.  Use run-length encoding.
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A character represents itself, unless it is the ESCAPE character.  Then
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following notations are possible:
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE ESCAPE   ESCAPE literal
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE n c      n instances of character c
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE, we represent this as:
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   c ESCAPE n-1 c
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE value is chosen so as not to collide with commonly
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(int[] a) {
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        appendInt(buffer, a.length);
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runValue = a[0];
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int s = a[i];
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s == runValue && runLength < 0xFFFF) {
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++runLength;
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, runValue, runLength);
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = s;
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, runValue, runLength);
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing a short array.  Use run-length encoding.
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A character represents itself, unless it is the ESCAPE character.  Then
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following notations are possible:
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE ESCAPE   ESCAPE literal
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE n c      n instances of character c
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE, we represent this as:
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   c ESCAPE n-1 c
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE value is chosen so as not to collide with commonly
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(short[] a) {
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) (a.length >> 16));
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) a.length);
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        short runValue = a[0];
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            short s = a[i];
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s == runValue && runLength < 0xFFFF) ++runLength;
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, runValue, runLength);
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = s;
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, runValue, runLength);
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing a char array.  Use run-length encoding.
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A character represents itself, unless it is the ESCAPE character.  Then
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following notations are possible:
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE ESCAPE   ESCAPE literal
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE n c      n instances of character c
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE, we represent this as:
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   c ESCAPE n-1 c
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE value is chosen so as not to collide with commonly
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(char[] a) {
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) (a.length >> 16));
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) a.length);
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char runValue = a[0];
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char s = a[i];
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s == runValue && runLength < 0xFFFF) ++runLength;
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, (short)runValue, runLength);
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = s;
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, (short)runValue, runLength);
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a string representing a byte array.  Use run-length encoding.
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Two bytes are packed into a single char, with a single extra zero byte at
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the end if needed.  A byte represents itself, unless it is the
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ESCAPE_BYTE.  Then the following notations are possible:
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   ESCAPE_BYTE n b           n instances of byte b
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   b ESCAPE_BYTE n-1 b
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The ESCAPE_BYTE value is chosen so as not to collide with commonly
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen values.
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String arrayToRLEString(byte[] a) {
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) (a.length >> 16));
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append((char) a.length);
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte runValue = a[0];
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 1;
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte[] state = new byte[2];
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=1; i<a.length; ++i) {
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte b = a[i];
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (b == runValue && runLength < 0xFF) ++runLength;
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                encodeRun(buffer, runValue, runLength, state);
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runValue = b;
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                runLength = 1;
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        encodeRun(buffer, runValue, runLength, state);
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We must save the final byte, if there is one, by padding
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // an extra zero.
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Encode a run, possibly a degenerate run (of < 4 values).
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param length The length of the run; must be > 0 && <= 0xFFFF.
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) {
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (length < 4) {
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int j=0; j<length; ++j) {
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (value == ESCAPE) {
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    appendInt(buffer, value);
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendInt(buffer, value);
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
376f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            if (length == ESCAPE) {
377f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                if (value == ESCAPE) {
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    appendInt(buffer, ESCAPE);
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendInt(buffer, value);
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --length;
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendInt(buffer, ESCAPE);
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendInt(buffer, length);
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendInt(buffer, value); // Don't need to escape this value
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void appendInt(T buffer, int value) {
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append((char)(value >>> 16));
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append((char)(value & 0xFFFF));
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Encode a run, possibly a degenerate run (of < 4 values).
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param length The length of the run; must be > 0 && <= 0xFFFF.
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) {
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
404f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            char valueChar = (char) value;
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (length < 4) {
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int j=0; j<length; ++j) {
407f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    if (valueChar == ESCAPE) {
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(ESCAPE);
409f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    }
410f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    buffer.append(valueChar);
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
414f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                if (length == ESCAPE) {
415f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    if (valueChar == ESCAPE) {
416f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                        buffer.append(ESCAPE);
417f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    }
418f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    buffer.append(valueChar);
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    --length;
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(ESCAPE);
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append((char) length);
423f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                buffer.append(valueChar); // Don't need to escape this value
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Encode a run, possibly a degenerate run (of < 4 values).
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param length The length of the run; must be > 0 && <= 0xFF.
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length,
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte[] state) {
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (length < 4) {
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int j=0; j<length; ++j) {
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendEncodedByte(buffer, value, state);
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
443f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            if ((byte)length == ESCAPE_BYTE) {
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appendEncodedByte(buffer, value, state);
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --length;
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendEncodedByte(buffer, ESCAPE_BYTE, state);
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendEncodedByte(buffer, (byte)length, state);
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendEncodedByte(buffer, value, state); // Don't need to escape this value
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append a byte to the given Appendable, packing two bytes into each
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character.  The state parameter maintains intermediary data between
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * calls.
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param state A two-element array, with state[0] == 0 if this is the
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * first byte of a pair, or state[0] != 0 if this is the second byte
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of a pair, in which case state[1] is the first byte.
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value,
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte[] state) {
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (state[0] != 0) {
466f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                char c = (char) ((state[1] << 8) | ((value) & 0xFF));
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(c);
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state[0] = 0;
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state[0] = 1;
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state[1] = value;
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of ints from a run-length encoded string.
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final int[] RLEStringToIntArray(String s) {
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int length = getInt(s, 0);
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] array = new int[length];
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ai = 0, i = 1;
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int maxI = s.length() / 2;
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (ai < length && i < maxI) {
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = getInt(s, i++);
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == ESCAPE) {
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = getInt(s, i++);
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ESCAPE) {
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = c;
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int runLength = c;
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int runValue = getInt(s, i++);
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int j=0; j<runLength; ++j) {
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        array[ai++] = runValue;
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                array[ai++] = c;
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ai != length || i != maxI) {
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded int array");
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final int getInt(String s, int i) {
515f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        return ((s.charAt(2*i)) << 16) | s.charAt(2*i+1);
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of shorts from a run-length encoded string.
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final short[] RLEStringToShortArray(String s) {
522f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        short[] array = new short[length];
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ai = 0;
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=2; i<s.length(); ++i) {
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i);
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == ESCAPE) {
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(++i);
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ESCAPE) {
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = (short) c;
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
532f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    int runLength = c;
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    short runValue = (short) s.charAt(++i);
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                array[ai++] = (short) c;
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ai != length)
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded short array");
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of shorts from a run-length encoded string.
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final char[] RLEStringToCharArray(String s) {
552f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char[] array = new char[length];
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ai = 0;
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=2; i<s.length(); ++i) {
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i);
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == ESCAPE) {
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(++i);
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ESCAPE) {
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = c;
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
562f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    int runLength = c;
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char runValue = s.charAt(++i);
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                array[ai++] = c;
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ai != length)
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded short array");
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct an array of bytes from a run-length encoded string.
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final byte[] RLEStringToByteArray(String s) {
582f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte[] array = new byte[length];
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean nextChar = true;
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        char c = 0;
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int node = 0;
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int runLength = 0;
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = 2;
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int ai=0; ai<length; ) {
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // This part of the loop places the next byte into the local
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // variable 'b' each time through the loop.  It keeps the
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // current character in 'c' and uses the boolean 'nextChar'
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // to see if we've taken both bytes out of 'c' yet.
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            byte b;
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (nextChar) {
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(i++);
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b = (byte) (c >> 8);
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                nextChar = false;
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b = (byte) (c & 0xFF);
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                nextChar = true;
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // This part of the loop is a tiny state machine which handles
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the parsing of the run-length encoding.  This would be simpler
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if we could look ahead, but we can't, so we use 'node' to
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // move between three nodes in the state machine.
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch (node) {
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 0:
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Normal idle node
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (b == ESCAPE_BYTE) {
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = 1;
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else {
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = b;
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 1:
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We have seen one ESCAPE_BYTE; we expect either a second
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // one, or a run length and value.
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (b == ESCAPE_BYTE) {
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    array[ai++] = ESCAPE_BYTE;
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = 0;
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else {
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    runLength = b;
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Interpret signed byte as unsigned
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (runLength < 0) runLength += 0x100;
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = 2;
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case 2:
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We have seen an ESCAPE_BYTE and length byte.  We interpret
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // the next byte as the value to be repeated.
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int j=0; j<runLength; ++j) array[ai++] = b;
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = 0;
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (node != 0)
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Bad run-length encoded byte array");
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (i != s.length())
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalStateException("Excess data in RLE byte array string");
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return array;
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public String LINE_SEPARATOR = System.getProperty("line.separator");
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Format a String for representation in a source file.  This includes
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * breaking it into lines and escaping characters using octal notation
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * when necessary (control characters and double quotes).
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String formatForSource(String s) {
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length();) {
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append("        \"");
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int count = 11;
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (i<s.length() && count<80) {
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char c = s.charAt(i++);
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c < '\u0020' || c == '"' || c == '\\') {
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (c == '\n') {
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append("\\n");
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 2;
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (c == '\t') {
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append("\\t");
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 2;
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if (c == '\r') {
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append("\\r");
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 2;
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Represent control characters, backslash and double quote
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // using octal notation; otherwise the string we form
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // won't compile, since Unicode escape sequences are
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // processed before tokenization.
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append('\\');
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append(HEX_DIGIT[(c & 0007)]);
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        count += 4;
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else if (c <= '\u007E') {
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(c);
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    count += 1;
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                else {
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\u");
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0x000F)]);
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    count += 6;
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append('"');
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        '8','9','A','B','C','D','E','F'};
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Format a String for representation in a source file.  Like
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * formatForSource but does not do line breaking.
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public final String format1ForSource(String s) {
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append("\"");
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length();) {
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i++);
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c < '\u0020' || c == '"' || c == '\\') {
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == '\n') {
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\n");
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c == '\t') {
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\t");
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (c == '\r') {
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append("\\r");
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Represent control characters, backslash and double quote
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // using octal notation; otherwise the string we form
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // won't compile, since Unicode escape sequences are
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // processed before tokenization.
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append('\\');
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(HEX_DIGIT[(c & 0007)]);
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (c <= '\u007E') {
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(c);
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append("\\u");
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(HEX_DIGIT[(c & 0x000F)]);
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append('"');
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert characters outside the range U+0020 to U+007F to
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Unicode escapes, and convert backslash to a double backslash.
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final String escape(String s) {
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length(); ) {
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = Character.codePointAt(s, i);
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i += UTF16.getCharCount(c);
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c >= ' ' && c <= 0x007F) {
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == '\\') {
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append("\\\\"); // That is, "\\"
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append((char)c);
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                boolean four = c <= 0xFFFF;
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(four ? "\\u" : "\\U");
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(hex(c, four ? 4 : 8));
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static private final char[] UNESCAPE_MAP = {
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*"   0x22, 0x22 */
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*'   0x27, 0x27 */
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*?   0x3F, 0x3F */
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*\   0x5C, 0x5C */
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*a*/ 0x61, 0x07,
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*b*/ 0x62, 0x08,
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*e*/ 0x65, 0x1b,
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*f*/ 0x66, 0x0c,
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*n*/ 0x6E, 0x0a,
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*r*/ 0x72, 0x0d,
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*t*/ 0x74, 0x09,
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*v*/ 0x76, 0x0b
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert an escape to a 32-bit code point value.  We attempt
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to parallel the icu4c unescapeAt() function.
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param offset16 an array containing offset to the character
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <em>after</em> the backslash.  Upon return offset16[0] will
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * be updated to point after the escape sequence.
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return character value from 0 to 10FFFF, or -1 on error.
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int unescapeAt(String s, int[] offset16) {
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c;
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int result = 0;
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int n = 0;
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int minDig = 0;
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int maxDig = 0;
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int bitsPerDigit = 4;
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int dig;
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean braces = false;
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Check that offset is in range */
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int offset = offset16[0];
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int length = s.length();
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (offset < 0 || offset >= length) {
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Fetch first UChar after '\\' */
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        c = Character.codePointAt(s, offset);
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        offset += UTF16.getCharCount(c);
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Convert hexadecimal and octal escapes */
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        switch (c) {
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 'u':
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDig = maxDig = 4;
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 'U':
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDig = maxDig = 8;
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case 'x':
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDig = 1;
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++offset;
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                braces = true;
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maxDig = 8;
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maxDig = 2;
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        default:
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dig = UCharacter.digit(c, 8);
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (dig >= 0) {
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                minDig = 1;
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maxDig = 3;
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                n = 1; /* Already have first octal digit */
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                bitsPerDigit = 3;
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result = dig;
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (minDig != 0) {
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (offset < length && n < maxDig) {
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = UTF16.charAt(s, offset);
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (dig < 0) {
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result = (result << bitsPerDigit) | dig;
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                offset += UTF16.getCharCount(c);
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++n;
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n < minDig) {
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (braces) {
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c != 0x7D /*}*/) {
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++offset;
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (result < 0 || result >= 0x110000) {
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If an escape sequence specifies a lead surrogate, see
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if there is a trail surrogate after it, either as an
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // escape or as a literal.  If so, join them up into a
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // supplementary.
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (offset < length &&
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    UTF16.isLeadSurrogate((char) result)) {
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int ahead = offset+1;
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = s.charAt(offset); // [sic] get 16-bit code unit
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == '\\' && ahead < length) {
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int o[] = new int[] { ahead };
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c = unescapeAt(s, o);
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ahead = o[0];
8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (UTF16.isTrailSurrogate((char) c)) {
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    offset = ahead;
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result = Character.toCodePoint((char) result, (char) c);
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset16[0] = offset;
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Convert C-style escapes in table */
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == UNESCAPE_MAP[i]) {
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                offset16[0] = offset;
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return UNESCAPE_MAP[i+1];
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (c < UNESCAPE_MAP[i]) {
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Map \cX to control-X: X & 0x1F */
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c == 'c' && offset < length) {
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c = UTF16.charAt(s, offset);
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset16[0] = offset + UTF16.getCharCount(c);
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0x1F & c;
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* If no special forms are recognized, then consider
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * the backslash to generically escape the next character. */
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        offset16[0] = offset;
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return c;
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert all escapes in a given string using unescapeAt().
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @exception IllegalArgumentException if an invalid escape is
9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * seen.
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String unescape(String s) {
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] pos = new int[1];
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length(); ) {
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i++);
9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == '\\') {
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos[0] = i;
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int e = unescapeAt(s, pos);
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (e < 0) {
9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new IllegalArgumentException("Invalid escape sequence " +
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            s.substring(i-1, Math.min(i+8, s.length())));
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.appendCodePoint(e);
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i = pos[0];
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(c);
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert all escapes in a given string using unescapeAt().
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Leave invalid escape sequences unchanged.
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String unescapeLeniently(String s) {
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] pos = new int[1];
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<s.length(); ) {
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = s.charAt(i++);
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == '\\') {
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos[0] = i;
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int e = unescapeAt(s, pos);
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (e < 0) {
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append(c);
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.appendCodePoint(e);
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    i = pos[0];
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buf.append(c);
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * "0041".
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String hex(long ch) {
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return hex(ch, 4);
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Supplies a zero-padded hex representation of an integer (without 0x)
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static public String hex(long i, int places) {
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (i == Long.MIN_VALUE) return "-8000000000000000";
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean negative = i < 0;
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (negative) {
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = -i;
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (result.length() < places) {
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result = "0000000000000000".substring(result.length(),places) + result;
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (negative) {
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return '-' + result;
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a string to comma-separated groups of 4 hex uppercase
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digits.  E.g., hex('ab') => "0041,0042".
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String hex(CharSequence s) {
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return hex(s, 4, ",", true, new StringBuilder()).toString();
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a string to separated groups of hex uppercase
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to the given Appendable.
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (useCodePoints) {
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int cp;
10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    cp = Character.codePointAt(s, i);
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (i != 0) {
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        result.append(separator);
10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(hex(cp,width));
10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (int i = 0; i < s.length(); ++i) {
10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (i != 0) {
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        result.append(separator);
10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(hex(s.charAt(i),width));
10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String hex(byte[] o, int start, int end, String separator) {
10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder();
10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //int ch;
10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = start; i < end; ++i) {
10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller          if (i != 0) result.append(separator);
10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller          result.append(hex(o[i]));
10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.toString();
10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      }
10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Convert a string to comma-separated groups of 4 hex uppercase
10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digits.  E.g., hex('ab') => "0041,0042".
10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <S extends CharSequence> String hex(S s, int width, S separator) {
10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return hex(s, width, separator, true, new StringBuilder()).toString();
1050f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    }
10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Split a string into pieces based on the given divider character
10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the string to split
10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param divider the character on which to split.  Occurrences of
10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * this character are not included in the output
10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param output an array to receive the substrings between
10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * instances of divider.  It must be large enough on entry to
10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * accomodate all output.  Adjacent instances of the divider
10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character will place empty strings into output.  Before
10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returning, output is padded out with empty strings.
10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void split(String s, char divider, String[] output) {
10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int last = 0;
10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int current = 0;
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = 0; i < s.length(); ++i) {
10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s.charAt(i) == divider) {
10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                output[current++] = s.substring(last,i);
10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                last = i+1;
10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        output[current++] = s.substring(last,i);
10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (current < output.length) {
10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            output[current++] = "";
10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Split a string into pieces based on the given divider character
10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s the string to split
10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param divider the character on which to split.  Occurrences of
10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * this character are not included in the output
10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return output an array to receive the substrings between
10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * instances of divider. Adjacent instances of the divider
10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character will place empty strings into output.
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String[] split(String s, char divider) {
10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int last = 0;
10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ArrayList<String> output = new ArrayList<String>();
10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = 0; i < s.length(); ++i) {
10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s.charAt(i) == divider) {
10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                output.add(s.substring(last,i));
10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                last = i+1;
10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        output.add( s.substring(last,i));
10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return output.toArray(new String[output.size()]);
11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Look up a given string in a string array.  Returns the index at
11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * which the first occurrence of the string was found in the
11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * array, or -1 if it was not found.
11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source the string to search for
11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param target the array of zero or more strings in which to
11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * look for source
11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the index of target at which source first occurs, or -1
11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * if not found
11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int lookup(String source, String[] target) {
11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < target.length; ++i) {
11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (source.equals(target[i])) return i;
11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a single non-whitespace character 'ch', optionally
11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * preceded by whitespace.
11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param id the string to be parsed
11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * offset of the first character to be parsed.  On output, pos[0]
11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * is the index after the last parsed character.  If the parse
11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * fails, pos[0] will be unchanged.
11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param ch the non-whitespace character to be parsed.
11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if 'ch' is seen preceded by zero or more
11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * whitespace characters.
11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static boolean parseChar(String id, int[] pos, char ch) {
11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int start = pos[0];
11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pos[0] = PatternProps.skipWhiteSpace(id, pos[0]);
11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (pos[0] == id.length() ||
11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                id.charAt(pos[0]) != ch) {
11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos[0] = start;
11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ++pos[0];
11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a pattern string starting at offset pos.  Keywords are
11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * matched case-insensitively.  Spaces may be skipped and may be
11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * optional or required.  Integer values may be parsed, and if
11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * they are, they will be returned in the given array.  If
11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * successful, the offset of the next non-space character is
11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * returned.  On failure, -1 is returned.
11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pattern must only contain lowercase characters, which
11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * will match their uppercase equivalents as well.  A space
11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character matches one or more required spaces.  A '~' character
11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * matches zero or more optional spaces.  A '#' character matches
11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * an integer and stores it in parsedInts, which the caller must
11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ensure has enough capacity.
11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param parsedInts array to receive parsed integers.  Caller
11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * must ensure that parsedInts.length is >= the number of '#'
11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * signs in 'pattern'.
11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the position after the last character parsed, or -1 if
11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the parse failed
11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @SuppressWarnings("fallthrough")
11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parsePattern(String rule, int pos, int limit,
11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String pattern, int[] parsedInts) {
11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO Update this to handle surrogates
11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] p = new int[1];
11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int intCount = 0; // number of integers parsed
11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<pattern.length(); ++i) {
11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char cpat = pattern.charAt(i);
11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c;
11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            switch (cpat) {
11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case ' ':
11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (pos >= limit) {
11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = rule.charAt(pos++);
11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (!PatternProps.isWhiteSpace(c)) {
11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // FALL THROUGH to skipWhitespace
11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '~':
11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos = PatternProps.skipWhiteSpace(rule, pos);
11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            case '#':
11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                p[0] = pos;
11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                parsedInts[intCount++] = parseInteger(rule, p, limit);
11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (p[0] == pos) {
11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Syntax error; failed to parse integer
11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                pos = p[0];
11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            default:
11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (pos >= limit) {
11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c != cpat) {
11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return pos;
12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a pattern string within the given Replaceable and a parsing
12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pattern.  Characters are matched literally and case-sensitively
12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * except for the following special characters:
12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ~  zero or more Pattern_White_Space chars
12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If end of pattern is reached with all matches along the way,
12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pos is advanced to the first unparsed index and returned.
12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Otherwise -1 is returned.
12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pat pattern that controls parsing
12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text text to be parsed, starting at index
12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param index offset to first character to parse
12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param limit offset after last character to parse
12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index after last parsed character, or -1 on parse failure.
12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parsePattern(String pat,
12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Replaceable text,
12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index,
12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int limit) {
12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ipat = 0;
12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // empty pattern matches immediately
12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ipat == pat.length()) {
12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return index;
12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cpat = Character.codePointAt(pat, ipat);
12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (index < limit) {
12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c = text.char32At(index);
12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // parse \s*
12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (cpat == '~') {
12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (PatternProps.isWhiteSpace(c)) {
12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    index += UTF16.getCharCount(c);
12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (++ipat == pat.length()) {
12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return index; // success; c unparsed
12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // fall thru; process c again with next cpat
12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // parse literal
12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else if (c == cpat) {
12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int n = UTF16.getCharCount(c);
12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index += n;
12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ipat += n;
12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (ipat == pat.length()) {
12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return index; // success; c parsed
12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // fall thru; get next cpat
12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // match failure of literal
12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            else {
12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cpat = UTF16.charAt(pat, ipat);
12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1; // text ended before end of pat
12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse an integer at pos, either of the form \d+ or of the form
12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or octal format.
12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUTPUT parameter.  On input, the first
12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character to parse.  On output, the character after the last
12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * parsed character.
12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parseInteger(String rule, int[] pos, int limit) {
12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int count = 0;
12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int value = 0;
12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p = pos[0];
12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int radix = 10;
12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (rule.regionMatches(true, p, "0x", 0, 2)) {
12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p += 2;
12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            radix = 16;
12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if (p < limit && rule.charAt(p) == '0') {
12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p++;
12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            count = 1;
12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            radix = 8;
12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (p < limit) {
12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int d = UCharacter.digit(rule.charAt(p++), radix);
12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (d < 0) {
13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                --p;
13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++count;
13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int v = (value * radix) + d;
13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (v <= value) {
13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If there are too many input digits, at some point
13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // the value will go negative, e.g., if we have seen
13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // "0x8000000" already and there is another '0', when
13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // we parse the next 0 the value will go negative.
13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            value = v;
13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (count > 0) {
13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pos[0] = p;
13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return value;
13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a Unicode identifier from the given string at the given
13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * position.  Return the identifier, or null if there is no
13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * identifier.
13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param str the string to parse
13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * first character to examine.  It must be less than str.length(),
13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and it must not point to a whitespace character.  That is, must
13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * have pos[0] < str.length().  On
13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * OUTPUT, the position after the last parsed character.
13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the Unicode identifier, or null if there is no valid
13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * identifier at pos[0].
13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String parseUnicodeIdentifier(String str, int[] pos) {
13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(pos[0] < str.length());
13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buf = new StringBuilder();
13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p = pos[0];
13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (p < str.length()) {
13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int ch = Character.codePointAt(str, p);
13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (buf.length() == 0) {
13402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (UCharacter.isUnicodeIdentifierStart(ch)) {
13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.appendCodePoint(ch);
13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return null;
13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (UCharacter.isUnicodeIdentifierPart(ch)) {
13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.appendCodePoint(ch);
13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p += UTF16.getCharCount(ch);
13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pos[0] = p;
13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buf.toString();
13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final char DIGITS[] = {
13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        'U', 'V', 'W', 'X', 'Y', 'Z'
13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append the digits of a positive integer to the given
13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <code>Appendable</code> in the given radix. This is
13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * done recursively since it is easiest to generate the low-
13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * order digit first, but it must be appended last.
13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param result is the <code>Appendable</code> to append to
13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param n is the positive integer
13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param radix is the radix, from 2 to 36 inclusive
13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minDigits is the minimum number of digits to append.
13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static <T extends Appendable> void recursiveAppendNumber(T result, int n,
13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int radix, int minDigits)
13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int digit = n % radix;
13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n >= radix || minDigits > 1) {
13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append(DIGITS[digit]);
13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append a number to the given Appendable in the given radix.
13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * radices 11 through 36.
13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param result the digits of the number are appended here
13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param n the number to be converted to digits; may be negative.
13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If negative, a '-' is prepended to the digits.
13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param radix a radix from 2 to 36 inclusive.
13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minDigits the minimum number of digits, not including
14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * any '-', to produce.  Values less than 2 have no effect.  One
14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * digit is always emitted regardless of this parameter.
14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a reference to result
14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Appendable> T appendNumber(T result, int n,
14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int radix, int minDigits)
14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (radix < 2 || radix > 36) {
14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException("Illegal radix " + radix);
14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int abs = n;
14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n < 0) {
14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                abs = -n;
14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append("-");
14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            recursiveAppendNumber(result, abs, radix, minDigits);
14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return result;
14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse an unsigned 31-bit integer at the given offset.  Use
14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * UCharacter.digit() to parse individual characters into digits.
14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text the text to be parsed
14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * offset within text at which to start parsing; it should point
14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * to a valid digit.  On exit, pos[0] is the offset after the last
14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * parsed character.  If the parse failed, it will be unchanged on
14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * exit.  Must be >= 0 on entry.
14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param radix the radix in which to parse; must be >= 2 and <=
14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * 36.
14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return a non-negative parsed number, or -1 upon parse failure.
14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse fails if there are no digits, that is, if pos[0] does not
14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * point to a valid digit on entry, or if the number to be parsed
14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * does not fit into a 31-bit unsigned integer.
14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int parseNumber(String text, int[] pos, int radix) {
14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(pos[0] >= 0);
14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(radix >= 2);
14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // assert(radix <= 36);
14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int n = 0;
14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p = pos[0];
14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (p < text.length()) {
14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int ch = Character.codePointAt(text, p);
14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int d = UCharacter.digit(ch, radix);
14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (d < 0) {
14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = radix*n + d;
14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // ASSUME that when a 32-bit integer overflows it becomes
14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // negative.  E.g., 214748364 * 10 + 8 => negative value.
14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n < 0) {
14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return -1;
14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ++p;
14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (p == pos[0]) {
14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pos[0] = p;
14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return n;
14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Return true if the character is NOT printable ASCII.  The tab,
14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * newline and linefeed characters are considered unprintable.
14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static boolean isUnprintable(int c) {
14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //0x20 = 32 and 0x7E = 126
14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !(c >= 0x20 && c <= 0x7E);
14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Escape unprintable characters using <backslash>uxxxx notation
14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * above.  If the character is printable ASCII, then do nothing
14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and return FALSE.  Otherwise, append the escaped notation and
14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * return TRUE.
14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (isUnprintable(c)) {
14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append('\\');
14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if ((c & ~0xFFFF) != 0) {
14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append('U');
14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>28)]);
14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>24)]);
14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>20)]);
14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append(DIGITS[0xF&(c>>16)]);
14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.append('u');
15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&(c>>12)]);
15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&(c>>8)]);
15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&(c>>4)]);
15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result.append(DIGITS[0xF&c]);
15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (IOException e) {
15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalIcuArgumentException(e);
15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the index of the first character in a set, ignoring quoted text.
15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * found by a search for "h".  Unlike String.indexOf(), this method searches
15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * not for a single character, but for any character of the string
15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <code>setOfChars</code>.
15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param text text to be searched
15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param start the beginning index, inclusive; <code>0 <= start
15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <= limit</code>.
15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param limit the ending index, exclusive; <code>start <= limit
15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <= text.length()</code>.
15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param setOfChars string with one or more distinct characters
15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return Offset of the first character in <code>setOfChars</code>
15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * found, or -1 if not found.
15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @see String#indexOf
15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int quotedIndexOf(String text, int start, int limit,
15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String setOfChars) {
15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=start; i<limit; ++i) {
15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = text.charAt(i);
15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == BACKSLASH) {
15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++i;
15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (c == APOSTROPHE) {
15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (++i < limit
15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        && text.charAt(i) != APOSTROPHE) {}
15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (setOfChars.indexOf(c) >= 0) {
15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append a character to a rule that is being built up.  To flush
15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the quoteBuf to rule, make one final call with isLiteral == true.
15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If there is no final character, pass in (int)-1 as c.
15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param rule the string to append the character to
15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c the character to append, or (int)-1 if none.
15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param isLiteral if true, then the given character should not be
15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * quoted or escaped.  Usually this means it is a syntactic element
15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * such as > or $
15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param escapeUnprintable if true, then unprintable characters
15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * should be escaped using escapeUnprintable().  These escapes will
15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * appear outside of quotes.
15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param quoteBuf a buffer which is used to build up quoted
15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * substrings.  The caller should initially supply an empty buffer,
15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and thereafter should not modify the buffer.  The buffer should be
15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * cleared out by, at the end, calling this method with a literal
15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * character (which may be -1).
15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void appendToRule(StringBuffer rule,
15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c,
15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean isLiteral,
15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable,
15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuffer quoteBuf) {
15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If we are escaping unprintables, then escape them outside
15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // quotes.  \\u and \\U are not recognized within quotes.  The same
15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // logic applies to literals, but literals are never escaped.
15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (isLiteral ||
15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (escapeUnprintable && Utility.isUnprintable(c))) {
15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (quoteBuf.length() > 0) {
15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We prefer backslash APOSTROPHE to double APOSTROPHE
15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // (more readable, less similar to ") so if there are
15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // double APOSTROPHEs at the ends, we pull them outside
15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // of the quote.
15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If the first thing in the quoteBuf is APOSTROPHE
15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // (doubled) then pull it out.
15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (quoteBuf.length() >= 2 &&
15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(0) == APOSTROPHE &&
15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(1) == APOSTROPHE) {
15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(BACKSLASH).append(APOSTROPHE);
15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    quoteBuf.delete(0, 2);
15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If the last thing in the quoteBuf is APOSTROPHE
15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // (doubled) then remove and count it and add it after.
15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int trailingCount = 0;
15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (quoteBuf.length() >= 2 &&
15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    quoteBuf.setLength(quoteBuf.length()-2);
15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++trailingCount;
15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (quoteBuf.length() > 0) {
15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(APOSTROPHE);
15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(quoteBuf);
15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(APOSTROPHE);
16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    quoteBuf.setLength(0);
16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (trailingCount-- > 0) {
16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.append(BACKSLASH).append(APOSTROPHE);
16042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
16052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c != -1) {
16072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /* Since spaces are ignored during parsing, they are
16082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * emitted only for readability.  We emit one here
16092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * only if there isn't already one at the end of the
16102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * rule.
16112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
16122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (c == ' ') {
16132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int len = rule.length();
16142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (len > 0 && rule.charAt(len-1) != ' ') {
16152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        rule.append(' ');
16162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
16172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
16182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    rule.appendCodePoint(c);
16192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
16202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Escape ' and '\' and don't begin a quote just for them
16242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if (quoteBuf.length() == 0 &&
16252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (c == APOSTROPHE || c == BACKSLASH)) {
16262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rule.append(BACKSLASH).append((char)c);
16272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Specials (printable ascii that isn't [0-9a-zA-Z]) and
16302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // whitespace need quoting.  Also append stuff to quotes if we are
16312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // building up a quoted substring already.
16322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if (quoteBuf.length() > 0 ||
16332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (c >= 0x0021 && c <= 0x007E &&
16342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
16352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
16372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                PatternProps.isWhiteSpace(c)) {
16382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            quoteBuf.appendCodePoint(c);
16392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Double ' within a quote
16402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c == APOSTROPHE) {
16412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                quoteBuf.append((char)c);
16422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Otherwise just append
16462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else {
16472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rule.appendCodePoint(c);
16482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Append the given string to the rule.  Calls the single-character
16532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * version of appendToRule for each character.
16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void appendToRule(StringBuffer rule,
16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String text,
16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean isLiteral,
16582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable,
16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuffer quoteBuf) {
16602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i=0; i<text.length(); ++i) {
16612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Okay to process in 16-bit code units here
16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
16632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Given a matcher reference, which may be null, append its
16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * pattern as a literal to the given rule.
16692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void appendToRule(StringBuffer rule,
16712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeMatcher matcher,
16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean escapeUnprintable,
16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuffer quoteBuf) {
16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (matcher != null) {
16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            appendToRule(rule, matcher.toPattern(escapeUnprintable),
16762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    true, escapeUnprintable, quoteBuf);
16772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compares 2 unsigned integers
16822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source 32 bit unsigned integer
16832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param target 32 bit unsigned integer
16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return 0 if equals, 1 if source is greater than target and -1
16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         otherwise
16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int compareUnsigned(int source, int target)
16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        source += MAGIC_UNSIGNED;
16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        target += MAGIC_UNSIGNED;
16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source < target) {
16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
1693f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        }
16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        else if (source > target) {
16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 1;
16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return 0;
16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Find the highest bit in a positive integer. This is done
17022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * by doing a binary search through the bits.
17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param n is the integer
17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the bit number of the highest bit, with 0 being
17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the low order bit, or -1 if <code>n</code> is not positive
17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final byte highBit(int n)
17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    {
17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n <= 0) {
17122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte bit = 0;
17162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 16) {
17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 16;
17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 16;
17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 8) {
17232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 8;
17242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 8;
17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 4) {
17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 4;
17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 4;
17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 2) {
17332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 2;
17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 2;
17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (n >= 1 << 1) {
17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n >>= 1;
17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bit += 1;
17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return bit;
17432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility method to take a int[] containing codepoints and return
1746f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert     * a string representation with code units.
17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String valueOf(int[]source){
17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: Investigate why this method is not on UTF16 class
17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder(source.length);
17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int i=0; i<source.length; i++){
17522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.appendCodePoint(source[i]);
17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.toString();
17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Utility to duplicate a string count times
17602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param s String to be duplicated.
17612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param count Number of times to duplicate a string.
17622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String repeat(String s, int count) {
17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (count <= 0) return "";
17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (count == 1) return s;
17662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder result = new StringBuilder();
17672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < count; ++i) {
17682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.append(s);
17692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result.toString();
17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1772f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String[] splitString(String src, String target) {
17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src.split("\\Q" + target + "\\E");
17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Split the string at runs of ascii whitespace characters.
17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String[] splitWhitespace(String src) {
17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src.split("\\s+");
17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a list of hex numbers and return a string
17862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param string String of hex numbers.
17872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minLength Minimal length.
17882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param separator Separator.
17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A string from hex numbers.
17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String fromHex(String string, int minLength, String separator) {
17922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+"));
17932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1794f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
17952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Parse a list of hex numbers and return a string
17972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param string String of hex numbers.
17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param minLength Minimal length.
17992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param separator Separator.
18002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return A string from hex numbers.
18012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static String fromHex(String string, int minLength, Pattern separator) {
18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder buffer = new StringBuilder();
18042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String[] parts = separator.split(string);
18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (String part : parts) {
18062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (part.length() < minLength) {
18072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException("code point too short: " + part);
18082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int cp = Integer.parseInt(part, 16);
18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.appendCodePoint(cp);
18112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return buffer.toString();
18132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
18142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
1815