12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 1996-2015, International Business Machines Corporation and * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved. * 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl; 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Locale; 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.regex.Pattern; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Replaceable; 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeMatcher; 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 221537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/** 231537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android 24836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */ 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class Utility { 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final char APOSTROPHE = '\''; 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final char BACKSLASH = '\\'; 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int MAGIC_UNSIGNED = 0x80000000; 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two Object[]s. 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayEquals(Object[] source, Object target) { 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source == null) return (target == null); 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!(target instanceof Object[])) return false; 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Object[] targ = (Object[]) target; 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (source.length == targ.length 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && arrayRegionMatches(source, 0, targ, 0, source.length)); 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two int[]s 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayEquals(int[] source, Object target) { 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source == null) return (target == null); 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!(target instanceof int[])) return false; 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] targ = (int[]) target; 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (source.length == targ.length 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && arrayRegionMatches(source, 0, targ, 0, source.length)); 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two double[]s 572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayEquals(double[] source, Object target) { 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source == null) return (target == null); 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!(target instanceof double[])) return false; 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller double[] targ = (double[]) target; 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (source.length == targ.length 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && arrayRegionMatches(source, 0, targ, 0, source.length)); 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayEquals(byte[] source, Object target) { 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source == null) return (target == null); 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!(target instanceof byte[])) return false; 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte[] targ = (byte[]) target; 702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (source.length == targ.length 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && arrayRegionMatches(source, 0, targ, 0, source.length)); 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two Object[]s 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayEquals(Object source, Object target) { 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source == null) return (target == null); 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for some reason, the correct arrayEquals is not being called 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // so do it by hand for now. 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source instanceof Object[]) 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return(arrayEquals((Object[]) source,target)); 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source instanceof int[]) 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return(arrayEquals((int[]) source,target)); 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source instanceof double[]) 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return(arrayEquals((double[]) source, target)); 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source instanceof byte[]) 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return(arrayEquals((byte[]) source,target)); 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return source.equals(target); 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two Object[]s 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System. 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param len the length to compare. 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The start indices and start+len must be valid. 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayRegionMatches(Object[] source, int sourceStart, 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Object[] target, int targetStart, 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len) 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceEnd = sourceStart + len; 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int delta = targetStart - sourceStart; 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = sourceStart; i < sourceEnd; i++) { 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!arrayEquals(source[i],target[i + delta])) 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two Object[]s 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System. 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param len the length to compare. 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The start indices and start+len must be valid. 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayRegionMatches(char[] source, int sourceStart, 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char[] target, int targetStart, 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len) 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceEnd = sourceStart + len; 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int delta = targetStart - sourceStart; 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = sourceStart; i < sourceEnd; i++) { 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source[i]!=target[i + delta]) 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 131f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert /** 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two int[]s. 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param len the length to compare. 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The start indices and start+len must be valid. 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayRegionMatches(int[] source, int sourceStart, 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] target, int targetStart, 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len) 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceEnd = sourceStart + len; 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int delta = targetStart - sourceStart; 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = sourceStart; i < sourceEnd; i++) { 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source[i] != target[i + delta]) 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility to compare two arrays of doubles. 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param len the length to compare. 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The start indices and start+len must be valid. 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Ought to be in System 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayRegionMatches(double[] source, int sourceStart, 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller double[] target, int targetStart, 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len) 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceEnd = sourceStart + len; 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int delta = targetStart - sourceStart; 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = sourceStart; i < sourceEnd; i++) { 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source[i] != target[i + delta]) 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean arrayRegionMatches(byte[] source, int sourceStart, 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte[] target, int targetStart, int len){ 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int sourceEnd = sourceStart + len; 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int delta = targetStart - sourceStart; 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = sourceStart; i < sourceEnd; i++) { 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source[i] != target[i + delta]) 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 180f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * Trivial reference equality. 181f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * This method should help document that we really want == not equals(), 182f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * and to have a single place to suppress warnings from static analysis tools. 183f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert */ 184f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert public static final boolean sameObjects(Object a, Object b) { 185f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return a == b; 186f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 187f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 188f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert /** 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility. Does null checks on objects, then calls equals. 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public final static boolean objectEquals(Object a, Object b) { 192f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return a == null ? 193f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert b == null ? true : false : 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b == null ? false : a.equals(b); 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 196f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility. Does null checks on objects, then calls compare. 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T extends Comparable<T>> int checkCompare(T a, T b) { 201f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return a == null ? 202f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert b == null ? 0 : -1 : 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b == null ? 1 : a.compareTo(b); 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convenience utility. Does null checks on object, then calls hashCode. 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int checkHash(Object a) { 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return a == null ? 0 : a.hashCode(); 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 212f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The ESCAPE character is used during run-length encoding. It signals 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a run of identical chars. 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final char ESCAPE = '\uA5A5'; 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The ESCAPE_BYTE character is used during run-length encoding. It signals 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * a run of identical bytes. 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final byte ESCAPE_BYTE = (byte)0xA5; 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct a string representing an int array. Use run-length encoding. 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A character represents itself, unless it is the ESCAPE character. Then 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the following notations are possible: 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE ESCAPE ESCAPE literal 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE n c n instances of character c 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Since an encoded run occupies 3 characters, we only encode runs of 4 or 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If we encounter a run where n == ESCAPE, we represent this as: 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c ESCAPE n-1 c 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The ESCAPE value is chosen so as not to collide with commonly 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * seen values. 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final String arrayToRLEString(int[] a) { 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer = new StringBuilder(); 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, a.length); 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runValue = a[0]; 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runLength = 1; 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=1; i<a.length; ++i) { 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int s = a[i]; 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s == runValue && runLength < 0xFFFF) { 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++runLength; 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, runValue, runLength); 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runValue = s; 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runLength = 1; 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, runValue, runLength); 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct a string representing a short array. Use run-length encoding. 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A character represents itself, unless it is the ESCAPE character. Then 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the following notations are possible: 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE ESCAPE ESCAPE literal 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE n c n instances of character c 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Since an encoded run occupies 3 characters, we only encode runs of 4 or 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If we encounter a run where n == ESCAPE, we represent this as: 2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c ESCAPE n-1 c 2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The ESCAPE value is chosen so as not to collide with commonly 2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * seen values. 2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final String arrayToRLEString(short[] a) { 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer = new StringBuilder(); 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]); 2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char) (a.length >> 16)); 2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char) a.length); 2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller short runValue = a[0]; 2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runLength = 1; 2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=1; i<a.length; ++i) { 2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller short s = a[i]; 2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s == runValue && runLength < 0xFFFF) ++runLength; 2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, runValue, runLength); 2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runValue = s; 2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runLength = 1; 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, runValue, runLength); 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct a string representing a char array. Use run-length encoding. 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * A character represents itself, unless it is the ESCAPE character. Then 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the following notations are possible: 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE ESCAPE ESCAPE literal 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE n c n instances of character c 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Since an encoded run occupies 3 characters, we only encode runs of 4 or 2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If we encounter a run where n == ESCAPE, we represent this as: 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * c ESCAPE n-1 c 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The ESCAPE value is chosen so as not to collide with commonly 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * seen values. 3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final String arrayToRLEString(char[] a) { 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer = new StringBuilder(); 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char) (a.length >> 16)); 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char) a.length); 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char runValue = a[0]; 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runLength = 1; 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=1; i<a.length; ++i) { 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char s = a[i]; 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s == runValue && runLength < 0xFFFF) ++runLength; 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, (short)runValue, runLength); 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runValue = s; 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runLength = 1; 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, (short)runValue, runLength); 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct a string representing a byte array. Use run-length encoding. 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Two bytes are packed into a single char, with a single extra zero byte at 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the end if needed. A byte represents itself, unless it is the 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE_BYTE. Then the following notations are possible: 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ESCAPE_BYTE n b n instances of byte b 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Since an encoded run occupies 3 bytes, we only encode runs of 4 or 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If we encounter a run where n == ESCAPE_BYTE, we represent this as: 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * b ESCAPE_BYTE n-1 b 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * The ESCAPE_BYTE value is chosen so as not to collide with commonly 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * seen values. 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final String arrayToRLEString(byte[] a) { 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer = new StringBuilder(); 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char) (a.length >> 16)); 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char) a.length); 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte runValue = a[0]; 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runLength = 1; 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte[] state = new byte[2]; 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=1; i<a.length; ++i) { 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte b = a[i]; 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (b == runValue && runLength < 0xFF) ++runLength; 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, runValue, runLength, state); 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runValue = b; 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runLength = 1; 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller encodeRun(buffer, runValue, runLength, state); 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We must save the final byte, if there is one, by padding 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // an extra zero. 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state); 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Encode a run, possibly a degenerate run (of < 4 values). 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param length The length of the run; must be > 0 && <= 0xFFFF. 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) { 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (length < 4) { 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<length; ++j) { 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (value == ESCAPE) { 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, value); 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, value); 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 376f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (length == ESCAPE) { 377f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (value == ESCAPE) { 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, ESCAPE); 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, value); 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --length; 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, ESCAPE); 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, length); 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendInt(buffer, value); // Don't need to escape this value 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final <T extends Appendable> void appendInt(T buffer, int value) { 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(value >>> 16)); 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char)(value & 0xFFFF)); 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalIcuArgumentException(e); 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Encode a run, possibly a degenerate run (of < 4 values). 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param length The length of the run; must be > 0 && <= 0xFFFF. 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) { 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 404f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert char valueChar = (char) value; 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (length < 4) { 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<length; ++j) { 407f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (valueChar == ESCAPE) { 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(ESCAPE); 409f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 410f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert buffer.append(valueChar); 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 414f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (length == ESCAPE) { 415f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (valueChar == ESCAPE) { 416f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert buffer.append(ESCAPE); 417f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 418f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert buffer.append(valueChar); 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --length; 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(ESCAPE); 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append((char) length); 423f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert buffer.append(valueChar); // Don't need to escape this value 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalIcuArgumentException(e); 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Encode a run, possibly a degenerate run (of < 4 values). 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param length The length of the run; must be > 0 && <= 0xFF. 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length, 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte[] state) { 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (length < 4) { 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<length; ++j) { 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state); 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendEncodedByte(buffer, value, state); 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 443f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if ((byte)length == ESCAPE_BYTE) { 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state); 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendEncodedByte(buffer, value, state); 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --length; 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendEncodedByte(buffer, ESCAPE_BYTE, state); 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendEncodedByte(buffer, (byte)length, state); 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendEncodedByte(buffer, value, state); // Don't need to escape this value 4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append a byte to the given Appendable, packing two bytes into each 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character. The state parameter maintains intermediary data between 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * calls. 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param state A two-element array, with state[0] == 0 if this is the 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * first byte of a pair, or state[0] != 0 if this is the second byte 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * of a pair, in which case state[1] is the first byte. 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value, 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte[] state) { 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (state[0] != 0) { 466f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert char c = (char) ((state[1] << 8) | ((value) & 0xFF)); 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(c); 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state[0] = 0; 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state[0] = 1; 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state[1] = value; 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalIcuArgumentException(e); 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct an array of ints from a run-length encoded string. 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final int[] RLEStringToIntArray(String s) { 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = getInt(s, 0); 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] array = new int[length]; 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ai = 0, i = 1; 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int maxI = s.length() / 2; 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (ai < length && i < maxI) { 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = getInt(s, i++); 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ESCAPE) { 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = getInt(s, i++); 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ESCAPE) { 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = c; 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runLength = c; 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runValue = getInt(s, i++); 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<runLength; ++j) { 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = runValue; 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = c; 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ai != length || i != maxI) { 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalStateException("Bad run-length encoded int array"); 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return array; 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final int getInt(String s, int i) { 515f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert return ((s.charAt(2*i)) << 16) | s.charAt(2*i+1); 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct an array of shorts from a run-length encoded string. 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final short[] RLEStringToShortArray(String s) { 522f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int length = ((s.charAt(0)) << 16) | (s.charAt(1)); 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller short[] array = new short[length]; 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ai = 0; 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=2; i<s.length(); ++i) { 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = s.charAt(i); 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ESCAPE) { 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(++i); 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ESCAPE) { 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = (short) c; 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 532f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int runLength = c; 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller short runValue = (short) s.charAt(++i); 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<runLength; ++j) array[ai++] = runValue; 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = (short) c; 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ai != length) 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalStateException("Bad run-length encoded short array"); 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return array; 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct an array of shorts from a run-length encoded string. 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final char[] RLEStringToCharArray(String s) { 552f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int length = ((s.charAt(0)) << 16) | (s.charAt(1)); 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char[] array = new char[length]; 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ai = 0; 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=2; i<s.length(); ++i) { 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = s.charAt(i); 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ESCAPE) { 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(++i); 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ESCAPE) { 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = c; 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 562f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int runLength = c; 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char runValue = s.charAt(++i); 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<runLength; ++j) array[ai++] = runValue; 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = c; 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ai != length) 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalStateException("Bad run-length encoded short array"); 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return array; 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Construct an array of bytes from a run-length encoded string. 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final byte[] RLEStringToByteArray(String s) { 582f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert int length = ((s.charAt(0)) << 16) | (s.charAt(1)); 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte[] array = new byte[length]; 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean nextChar = true; 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = 0; 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int node = 0; 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int runLength = 0; 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i = 2; 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int ai=0; ai<length; ) { 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This part of the loop places the next byte into the local 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // variable 'b' each time through the loop. It keeps the 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // current character in 'c' and uses the boolean 'nextChar' 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to see if we've taken both bytes out of 'c' yet. 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte b; 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (nextChar) { 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(i++); 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = (byte) (c >> 8); 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextChar = false; 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller b = (byte) (c & 0xFF); 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextChar = true; 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This part of the loop is a tiny state machine which handles 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the parsing of the run-length encoding. This would be simpler 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if we could look ahead, but we can't, so we use 'node' to 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // move between three nodes in the state machine. 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (node) { 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 0: 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Normal idle node 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (b == ESCAPE_BYTE) { 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = 1; 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = b; 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 1: 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have seen one ESCAPE_BYTE; we expect either a second 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // one, or a run length and value. 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (b == ESCAPE_BYTE) { 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller array[ai++] = ESCAPE_BYTE; 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = 0; 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller runLength = b; 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Interpret signed byte as unsigned 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (runLength < 0) runLength += 0x100; 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = 2; 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 2: 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have seen an ESCAPE_BYTE and length byte. We interpret 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the next byte as the value to be repeated. 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j=0; j<runLength; ++j) array[ai++] = b; 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node = 0; 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (node != 0) 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalStateException("Bad run-length encoded byte array"); 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i != s.length()) 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalStateException("Excess data in RLE byte array string"); 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return array; 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public String LINE_SEPARATOR = System.getProperty("line.separator"); 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Format a String for representation in a source file. This includes 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * breaking it into lines and escaping characters using octal notation 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * when necessary (control characters and double quotes). 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final String formatForSource(String s) { 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer = new StringBuilder(); 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<s.length();) { 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i > 0) buffer.append('+').append(LINE_SEPARATOR); 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(" \""); 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = 11; 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (i<s.length() && count<80) { 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = s.charAt(i++); 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < '\u0020' || c == '"' || c == '\\') { 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '\n') { 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\n"); 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count += 2; 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c == '\t') { 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\t"); 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count += 2; 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c == '\r') { 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\r"); 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count += 2; 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Represent control characters, backslash and double quote 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // using octal notation; otherwise the string we form 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // won't compile, since Unicode escape sequences are 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // processed before tokenization. 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append('\\'); 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0070) >> 3]); 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0007)]); 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count += 4; 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (c <= '\u007E') { 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(c); 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count += 1; 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\u"); 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0x000F)]); 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count += 6; 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append('"'); 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7', 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller '8','9','A','B','C','D','E','F'}; 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Format a String for representation in a source file. Like 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * formatForSource but does not do line breaking. 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public final String format1ForSource(String s) { 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer = new StringBuilder(); 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\""); 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<s.length();) { 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = s.charAt(i++); 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c < '\u0020' || c == '"' || c == '\\') { 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '\n') { 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\n"); 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c == '\t') { 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\t"); 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c == '\r') { 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\r"); 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Represent control characters, backslash and double quote 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // using octal notation; otherwise the string we form 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // won't compile, since Unicode escape sequences are 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // processed before tokenization. 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append('\\'); 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0070) >> 3]); 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0007)]); 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (c <= '\u007E') { 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(c); 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append("\\u"); 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append(HEX_DIGIT[(c & 0x000F)]); 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.append('"'); 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert characters outside the range U+0020 to U+007F to 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Unicode escapes, and convert backslash to a double backslash. 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final String escape(String s) { 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buf = new StringBuilder(); 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<s.length(); ) { 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = Character.codePointAt(s, i); 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i += UTF16.getCharCount(c); 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c >= ' ' && c <= 0x007F) { 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '\\') { 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append("\\\\"); // That is, "\\" 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append((char)c); 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean four = c <= 0xFFFF; 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append(four ? "\\u" : "\\U"); 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append(hex(c, four ? 4 : 8)); 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf.toString(); 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static private final char[] UNESCAPE_MAP = { 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*" 0x22, 0x22 */ 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*' 0x27, 0x27 */ 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*? 0x3F, 0x3F */ 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*\ 0x5C, 0x5C */ 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*a*/ 0x61, 0x07, 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*b*/ 0x62, 0x08, 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*e*/ 0x65, 0x1b, 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*f*/ 0x66, 0x0c, 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*n*/ 0x6E, 0x0a, 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*r*/ 0x72, 0x0d, 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*t*/ 0x74, 0x09, 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /*v*/ 0x76, 0x0b 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert an escape to a 32-bit code point value. We attempt 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to parallel the icu4c unescapeAt() function. 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param offset16 an array containing offset to the character 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <em>after</em> the backslash. Upon return offset16[0] will 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * be updated to point after the escape sequence. 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return character value from 0 to 10FFFF, or -1 on error. 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int unescapeAt(String s, int[] offset16) { 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int result = 0; 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int n = 0; 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int minDig = 0; 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int maxDig = 0; 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int bitsPerDigit = 4; 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int dig; 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean braces = false; 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Check that offset is in range */ 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int offset = offset16[0]; 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int length = s.length(); 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (offset < 0 || offset >= length) { 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Fetch first UChar after '\\' */ 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = Character.codePointAt(s, offset); 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset += UTF16.getCharCount(c); 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Convert hexadecimal and octal escapes */ 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (c) { 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 'u': 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minDig = maxDig = 4; 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 'U': 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minDig = maxDig = 8; 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case 'x': 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minDig = 1; 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) { 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++offset; 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller braces = true; 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maxDig = 8; 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maxDig = 2; 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dig = UCharacter.digit(c, 8); 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (dig >= 0) { 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller minDig = 1; 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller maxDig = 3; 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = 1; /* Already have first octal digit */ 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bitsPerDigit = 3; 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = dig; 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (minDig != 0) { 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (offset < length && n < maxDig) { 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = UTF16.charAt(s, offset); 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (dig < 0) { 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = (result << bitsPerDigit) | dig; 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset += UTF16.getCharCount(c); 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++n; 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n < minDig) { 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (braces) { 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c != 0x7D /*}*/) { 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++offset; 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (result < 0 || result >= 0x110000) { 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If an escape sequence specifies a lead surrogate, see 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // if there is a trail surrogate after it, either as an 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // escape or as a literal. If so, join them up into a 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // supplementary. 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (offset < length && 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UTF16.isLeadSurrogate((char) result)) { 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ahead = offset+1; 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = s.charAt(offset); // [sic] get 16-bit code unit 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '\\' && ahead < length) { 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int o[] = new int[] { ahead }; 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = unescapeAt(s, o); 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ahead = o[0]; 8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (UTF16.isTrailSurrogate((char) c)) { 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset = ahead; 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = Character.toCodePoint((char) result, (char) c); 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset16[0] = offset; 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Convert C-style escapes in table */ 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i=0; i<UNESCAPE_MAP.length; i+=2) { 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == UNESCAPE_MAP[i]) { 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset16[0] = offset; 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return UNESCAPE_MAP[i+1]; 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c < UNESCAPE_MAP[i]) { 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Map \cX to control-X: X & 0x1F */ 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == 'c' && offset < length) { 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = UTF16.charAt(s, offset); 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset16[0] = offset + UTF16.getCharCount(c); 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0x1F & c; 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* If no special forms are recognized, then consider 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the backslash to generically escape the next character. */ 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller offset16[0] = offset; 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return c; 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert all escapes in a given string using unescapeAt(). 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @exception IllegalArgumentException if an invalid escape is 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * seen. 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String unescape(String s) { 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buf = new StringBuilder(); 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] pos = new int[1]; 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<s.length(); ) { 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = s.charAt(i++); 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '\\') { 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos[0] = i; 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int e = unescapeAt(s, pos); 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (e < 0) { 9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Invalid escape sequence " + 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s.substring(i-1, Math.min(i+8, s.length()))); 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.appendCodePoint(e); 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = pos[0]; 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append(c); 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf.toString(); 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert all escapes in a given string using unescapeAt(). 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Leave invalid escape sequences unchanged. 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String unescapeLeniently(String s) { 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buf = new StringBuilder(); 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] pos = new int[1]; 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<s.length(); ) { 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = s.charAt(i++); 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == '\\') { 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos[0] = i; 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int e = unescapeAt(s, pos); 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (e < 0) { 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append(c); 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.appendCodePoint(e); 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = pos[0]; 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.append(c); 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf.toString(); 9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert a char to 4 hex uppercase digits. E.g., hex('a') => 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * "0041". 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String hex(long ch) { 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return hex(ch, 4); 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Supplies a zero-padded hex representation of an integer (without 0x) 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static public String hex(long i, int places) { 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i == Long.MIN_VALUE) return "-8000000000000000"; 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean negative = i < 0; 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (negative) { 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = -i; 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH); 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (result.length() < places) { 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result = "0000000000000000".substring(result.length(),places) + result; 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (negative) { 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return '-' + result; 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert a string to comma-separated groups of 4 hex uppercase 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * digits. E.g., hex('ab') => "0041,0042". 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String hex(CharSequence s) { 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return hex(s, 4, ",", true, new StringBuilder()).toString(); 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert a string to separated groups of hex uppercase 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * digits. E.g., hex('ab'...) => "0041,0042". Append the output 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to the given Appendable. 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) { 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (useCodePoints) { 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp; 10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cp = Character.codePointAt(s, i); 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i != 0) { 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(separator); 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(hex(cp,width)); 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < s.length(); ++i) { 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i != 0) { 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(separator); 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(hex(s.charAt(i),width)); 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalIcuArgumentException(e); 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String hex(byte[] o, int start, int end, String separator) { 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder result = new StringBuilder(); 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //int ch; 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = start; i < end; ++i) { 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (i != 0) result.append(separator); 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(hex(o[i])); 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result.toString(); 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Convert a string to comma-separated groups of 4 hex uppercase 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * digits. E.g., hex('ab') => "0041,0042". 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <S extends CharSequence> String hex(S s, int width, S separator) { 10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return hex(s, width, separator, true, new StringBuilder()).toString(); 1050f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Split a string into pieces based on the given divider character 10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the string to split 10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param divider the character on which to split. Occurrences of 10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this character are not included in the output 10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param output an array to receive the substrings between 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * instances of divider. It must be large enough on entry to 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * accomodate all output. Adjacent instances of the divider 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character will place empty strings into output. Before 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returning, output is padded out with empty strings. 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void split(String s, char divider, String[] output) { 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int last = 0; 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int current = 0; 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i = 0; i < s.length(); ++i) { 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s.charAt(i) == divider) { 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller output[current++] = s.substring(last,i); 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller last = i+1; 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller output[current++] = s.substring(last,i); 10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (current < output.length) { 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller output[current++] = ""; 10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Split a string into pieces based on the given divider character 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s the string to split 10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param divider the character on which to split. Occurrences of 10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * this character are not included in the output 10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return output an array to receive the substrings between 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * instances of divider. Adjacent instances of the divider 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character will place empty strings into output. 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String[] split(String s, char divider) { 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int last = 0; 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ArrayList<String> output = new ArrayList<String>(); 10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i = 0; i < s.length(); ++i) { 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s.charAt(i) == divider) { 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller output.add(s.substring(last,i)); 10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller last = i+1; 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller output.add( s.substring(last,i)); 10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return output.toArray(new String[output.size()]); 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Look up a given string in a string array. Returns the index at 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * which the first occurrence of the string was found in the 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * array, or -1 if it was not found. 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param source the string to search for 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param target the array of zero or more strings in which to 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * look for source 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the index of target at which source first occurs, or -1 11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * if not found 11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int lookup(String source, String[] target) { 11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < target.length; ++i) { 11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source.equals(target[i])) return i; 11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse a single non-whitespace character 'ch', optionally 11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * preceded by whitespace. 11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param id the string to be parsed 11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the 11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * offset of the first character to be parsed. On output, pos[0] 11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * is the index after the last parsed character. If the parse 11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * fails, pos[0] will be unchanged. 11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param ch the non-whitespace character to be parsed. 11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return true if 'ch' is seen preceded by zero or more 11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * whitespace characters. 11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean parseChar(String id, int[] pos, char ch) { 11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int start = pos[0]; 11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos[0] = PatternProps.skipWhiteSpace(id, pos[0]); 11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pos[0] == id.length() || 11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller id.charAt(pos[0]) != ch) { 11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos[0] = start; 11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++pos[0]; 11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse a pattern string starting at offset pos. Keywords are 11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * matched case-insensitively. Spaces may be skipped and may be 11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * optional or required. Integer values may be parsed, and if 11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * they are, they will be returned in the given array. If 11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * successful, the offset of the next non-space character is 11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * returned. On failure, -1 is returned. 11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pattern must only contain lowercase characters, which 11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * will match their uppercase equivalents as well. A space 11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character matches one or more required spaces. A '~' character 11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * matches zero or more optional spaces. A '#' character matches 11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * an integer and stores it in parsedInts, which the caller must 11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ensure has enough capacity. 11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param parsedInts array to receive parsed integers. Caller 11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * must ensure that parsedInts.length is >= the number of '#' 11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * signs in 'pattern'. 11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the position after the last character parsed, or -1 if 11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the parse failed 11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller @SuppressWarnings("fallthrough") 11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int parsePattern(String rule, int pos, int limit, 11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String pattern, int[] parsedInts) { 11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO Update this to handle surrogates 11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] p = new int[1]; 11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int intCount = 0; // number of integers parsed 11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<pattern.length(); ++i) { 11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char cpat = pattern.charAt(i); 11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c; 11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (cpat) { 11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case ' ': 11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pos >= limit) { 11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = rule.charAt(pos++); 11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!PatternProps.isWhiteSpace(c)) { 11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // FALL THROUGH to skipWhitespace 11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '~': 11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = PatternProps.skipWhiteSpace(rule, pos); 11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case '#': 11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p[0] = pos; 11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller parsedInts[intCount++] = parseInteger(rule, p, limit); 11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p[0] == pos) { 11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Syntax error; failed to parse integer 11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos = p[0]; 11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (pos >= limit) { 11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = (char) UCharacter.toLowerCase(rule.charAt(pos++)); 11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c != cpat) { 11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return pos; 12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse a pattern string within the given Replaceable and a parsing 12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pattern. Characters are matched literally and case-sensitively 12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * except for the following special characters: 12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * ~ zero or more Pattern_White_Space chars 12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If end of pattern is reached with all matches along the way, 12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pos is advanced to the first unparsed index and returned. 12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Otherwise -1 is returned. 12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pat pattern that controls parsing 12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param text text to be parsed, starting at index 12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param index offset to first character to parse 12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit offset after last character to parse 12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return index after last parsed character, or -1 on parse failure. 12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int parsePattern(String pat, 12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Replaceable text, 12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index, 12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int limit) { 12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ipat = 0; 12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // empty pattern matches immediately 12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ipat == pat.length()) { 12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; 12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cpat = Character.codePointAt(pat, ipat); 12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (index < limit) { 12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c = text.char32At(index); 12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // parse \s* 12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (cpat == '~') { 12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (PatternProps.isWhiteSpace(c)) { 12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index += UTF16.getCharCount(c); 12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (++ipat == pat.length()) { 12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; // success; c unparsed 12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fall thru; process c again with next cpat 12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // parse literal 12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (c == cpat) { 12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int n = UTF16.getCharCount(c); 12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index += n; 12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ipat += n; 12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ipat == pat.length()) { 12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return index; // success; c parsed 12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fall thru; get next cpat 12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // match failure of literal 12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cpat = UTF16.charAt(pat, ipat); 12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; // text ended before end of pat 12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse an integer at pos, either of the form \d+ or of the form 12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, 12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * or octal format. 12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pos INPUT-OUTPUT parameter. On input, the first 12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character to parse. On output, the character after the last 12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * parsed character. 12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int parseInteger(String rule, int[] pos, int limit) { 12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = 0; 12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int value = 0; 12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p = pos[0]; 12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int radix = 10; 12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (rule.regionMatches(true, p, "0x", 0, 2)) { 12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p += 2; 12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller radix = 16; 12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (p < limit && rule.charAt(p) == '0') { 12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p++; 12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count = 1; 12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller radix = 8; 12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (p < limit) { 12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int d = UCharacter.digit(rule.charAt(p++), radix); 12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (d < 0) { 13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller --p; 13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++count; 13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int v = (value * radix) + d; 13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (v <= value) { 13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there are too many input digits, at some point 13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the value will go negative, e.g., if we have seen 13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // "0x8000000" already and there is another '0', when 13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // we parse the next 0 the value will go negative. 13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller value = v; 13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (count > 0) { 13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos[0] = p; 13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return value; 13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse a Unicode identifier from the given string at the given 13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * position. Return the identifier, or null if there is no 13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * identifier. 13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param str the string to parse 13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the 13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * first character to examine. It must be less than str.length(), 13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and it must not point to a whitespace character. That is, must 13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * have pos[0] < str.length(). On 13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * OUTPUT, the position after the last parsed character. 13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the Unicode identifier, or null if there is no valid 13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * identifier at pos[0]. 13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String parseUnicodeIdentifier(String str, int[] pos) { 13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(pos[0] < str.length()); 13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buf = new StringBuilder(); 13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p = pos[0]; 13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (p < str.length()) { 13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ch = Character.codePointAt(str, p); 13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (buf.length() == 0) { 13402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (UCharacter.isUnicodeIdentifierStart(ch)) { 13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.appendCodePoint(ch); 13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return null; 13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (UCharacter.isUnicodeIdentifierPart(ch)) { 13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buf.appendCodePoint(ch); 13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller p += UTF16.getCharCount(ch); 13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos[0] = p; 13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buf.toString(); 13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final char DIGITS[] = { 13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 'U', 'V', 'W', 'X', 'Y', 'Z' 13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller }; 13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append the digits of a positive integer to the given 13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>Appendable</code> in the given radix. This is 13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * done recursively since it is easiest to generate the low- 13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * order digit first, but it must be appended last. 13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param result is the <code>Appendable</code> to append to 13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param n is the positive integer 13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param radix is the radix, from 2 to 36 inclusive 13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param minDigits is the minimum number of digits to append. 13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static <T extends Appendable> void recursiveAppendNumber(T result, int n, 13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int radix, int minDigits) 13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int digit = n % radix; 13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n >= radix || minDigits > 1) { 13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller recursiveAppendNumber(result, n / radix, radix, minDigits - 1); 13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[digit]); 13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalIcuArgumentException(e); 13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append a number to the given Appendable in the given radix. 13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Standard digits '0'-'9' are used and letters 'A'-'Z' for 13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * radices 11 through 36. 13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param result the digits of the number are appended here 13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param n the number to be converted to digits; may be negative. 13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If negative, a '-' is prepended to the digits. 13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param radix a radix from 2 to 36 inclusive. 13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param minDigits the minimum number of digits, not including 14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * any '-', to produce. Values less than 2 have no effect. One 14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * digit is always emitted regardless of this parameter. 14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a reference to result 14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T extends Appendable> T appendNumber(T result, int n, 14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int radix, int minDigits) 14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (radix < 2 || radix > 36) { 14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("Illegal radix " + radix); 14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int abs = n; 14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n < 0) { 14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller abs = -n; 14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append("-"); 14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller recursiveAppendNumber(result, abs, radix, minDigits); 14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result; 14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalIcuArgumentException(e); 14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse an unsigned 31-bit integer at the given offset. Use 14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * UCharacter.digit() to parse individual characters into digits. 14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param text the text to be parsed 14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the 14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * offset within text at which to start parsing; it should point 14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * to a valid digit. On exit, pos[0] is the offset after the last 14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * parsed character. If the parse failed, it will be unchanged on 14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * exit. Must be >= 0 on entry. 14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param radix the radix in which to parse; must be >= 2 and <= 14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 36. 14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return a non-negative parsed number, or -1 upon parse failure. 14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse fails if there are no digits, that is, if pos[0] does not 14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * point to a valid digit on entry, or if the number to be parsed 14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * does not fit into a 31-bit unsigned integer. 14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int parseNumber(String text, int[] pos, int radix) { 14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(pos[0] >= 0); 14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(radix >= 2); 14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // assert(radix <= 36); 14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int n = 0; 14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int p = pos[0]; 14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (p < text.length()) { 14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ch = Character.codePointAt(text, p); 14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int d = UCharacter.digit(ch, radix); 14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (d < 0) { 14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = radix*n + d; 14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // ASSUME that when a 32-bit integer overflows it becomes 14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // negative. E.g., 214748364 * 10 + 8 => negative value. 14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n < 0) { 14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++p; 14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p == pos[0]) { 14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pos[0] = p; 14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return n; 14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Return true if the character is NOT printable ASCII. The tab, 14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * newline and linefeed characters are considered unprintable. 14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static boolean isUnprintable(int c) { 14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //0x20 = 32 and 0x7E = 126 14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return !(c >= 0x20 && c <= 0x7E); 14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Escape unprintable characters using <backslash>uxxxx notation 14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and 14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * above. If the character is printable ASCII, then do nothing 14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and return FALSE. Otherwise, append the escaped notation and 14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * return TRUE. 14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static <T extends Appendable> boolean escapeUnprintable(T result, int c) { 14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (isUnprintable(c)) { 14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('\\'); 14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((c & ~0xFFFF) != 0) { 14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('U'); 14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&(c>>28)]); 14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&(c>>24)]); 14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&(c>>20)]); 14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&(c>>16)]); 14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append('u'); 15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&(c>>12)]); 15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&(c>>8)]); 15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&(c>>4)]); 15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(DIGITS[0xF&c]); 15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return true; 15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (IOException e) { 15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalIcuArgumentException(e); 15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Returns the index of the first character in a set, ignoring quoted text. 15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * For example, in the string "abc'hide'h", the 'h' in "hide" will not be 15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * found by a search for "h". Unlike String.indexOf(), this method searches 15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * not for a single character, but for any character of the string 15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <code>setOfChars</code>. 15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param text text to be searched 15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param start the beginning index, inclusive; <code>0 <= start 15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <= limit</code>. 15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param limit the ending index, exclusive; <code>start <= limit 15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <= text.length()</code>. 15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param setOfChars string with one or more distinct characters 15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return Offset of the first character in <code>setOfChars</code> 15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * found, or -1 if not found. 15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @see String#indexOf 15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static int quotedIndexOf(String text, int start, int limit, 15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String setOfChars) { 15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=start; i<limit; ++i) { 15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char c = text.charAt(i); 15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == BACKSLASH) { 15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++i; 15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (c == APOSTROPHE) { 15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (++i < limit 15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && text.charAt(i) != APOSTROPHE) {} 15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (setOfChars.indexOf(c) >= 0) { 15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return i; 15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append a character to a rule that is being built up. To flush 15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the quoteBuf to rule, make one final call with isLiteral == true. 15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * If there is no final character, pass in (int)-1 as c. 15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param rule the string to append the character to 15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param c the character to append, or (int)-1 if none. 15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param isLiteral if true, then the given character should not be 15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * quoted or escaped. Usually this means it is a syntactic element 15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * such as > or $ 15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param escapeUnprintable if true, then unprintable characters 15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * should be escaped using escapeUnprintable(). These escapes will 15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * appear outside of quotes. 15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param quoteBuf a buffer which is used to build up quoted 15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * substrings. The caller should initially supply an empty buffer, 15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * and thereafter should not modify the buffer. The buffer should be 15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * cleared out by, at the end, calling this method with a literal 15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * character (which may be -1). 15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void appendToRule(StringBuffer rule, 15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c, 15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isLiteral, 15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean escapeUnprintable, 15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuffer quoteBuf) { 15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If we are escaping unprintables, then escape them outside 15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // quotes. \\u and \\U are not recognized within quotes. The same 15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // logic applies to literals, but literals are never escaped. 15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (isLiteral || 15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (escapeUnprintable && Utility.isUnprintable(c))) { 15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (quoteBuf.length() > 0) { 15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We prefer backslash APOSTROPHE to double APOSTROPHE 15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (more readable, less similar to ") so if there are 15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // double APOSTROPHEs at the ends, we pull them outside 15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // of the quote. 15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the first thing in the quoteBuf is APOSTROPHE 15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (doubled) then pull it out. 15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (quoteBuf.length() >= 2 && 15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.charAt(0) == APOSTROPHE && 15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.charAt(1) == APOSTROPHE) { 15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.append(BACKSLASH).append(APOSTROPHE); 15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.delete(0, 2); 15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the last thing in the quoteBuf is APOSTROPHE 15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (doubled) then remove and count it and add it after. 15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int trailingCount = 0; 15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (quoteBuf.length() >= 2 && 15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE && 15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) { 15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.setLength(quoteBuf.length()-2); 15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++trailingCount; 15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (quoteBuf.length() > 0) { 15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.append(APOSTROPHE); 15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.append(quoteBuf); 15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.append(APOSTROPHE); 16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.setLength(0); 16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (trailingCount-- > 0) { 16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.append(BACKSLASH).append(APOSTROPHE); 16042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c != -1) { 16072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /* Since spaces are ignored during parsing, they are 16082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * emitted only for readability. We emit one here 16092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * only if there isn't already one at the end of the 16102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * rule. 16112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == ' ') { 16132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int len = rule.length(); 16142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (len > 0 && rule.charAt(len-1) != ' ') { 16152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.append(' '); 16162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) { 16182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.appendCodePoint(c); 16192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Escape ' and '\' and don't begin a quote just for them 16242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (quoteBuf.length() == 0 && 16252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (c == APOSTROPHE || c == BACKSLASH)) { 16262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.append(BACKSLASH).append((char)c); 16272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Specials (printable ascii that isn't [0-9a-zA-Z]) and 16302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // whitespace need quoting. Also append stuff to quotes if we are 16312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // building up a quoted substring already. 16322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (quoteBuf.length() > 0 || 16332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (c >= 0x0021 && c <= 0x007E && 16342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || 16352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || 16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || 16372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller PatternProps.isWhiteSpace(c)) { 16382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.appendCodePoint(c); 16392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Double ' within a quote 16402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c == APOSTROPHE) { 16412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller quoteBuf.append((char)c); 16422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Otherwise just append 16462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else { 16472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller rule.appendCodePoint(c); 16482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 16522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Append the given string to the rule. Calls the single-character 16532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * version of appendToRule for each character. 16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void appendToRule(StringBuffer rule, 16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String text, 16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isLiteral, 16582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean escapeUnprintable, 16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuffer quoteBuf) { 16602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i=0; i<text.length(); ++i) { 16612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Okay to process in 16-bit code units here 16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf); 16632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 16672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Given a matcher reference, which may be null, append its 16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * pattern as a literal to the given rule. 16692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static void appendToRule(StringBuffer rule, 16712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeMatcher matcher, 16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean escapeUnprintable, 16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuffer quoteBuf) { 16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (matcher != null) { 16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller appendToRule(rule, matcher.toPattern(escapeUnprintable), 16762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller true, escapeUnprintable, quoteBuf); 16772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 16802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 16812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Compares 2 unsigned integers 16822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param source 32 bit unsigned integer 16832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param target 32 bit unsigned integer 16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return 0 if equals, 1 if source is greater than target and -1 16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * otherwise 16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final int compareUnsigned(int source, int target) 16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller source += MAGIC_UNSIGNED; 16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller target += MAGIC_UNSIGNED; 16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (source < target) { 16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 1693f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller else if (source > target) { 16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 1; 16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Find the highest bit in a positive integer. This is done 17022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * by doing a binary search through the bits. 17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param n is the integer 17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * 17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return the bit number of the highest bit, with 0 being 17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * the low order bit, or -1 if <code>n</code> is not positive 17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static final byte highBit(int n) 17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n <= 0) { 17122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller byte bit = 0; 17162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n >= 1 << 16) { 17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n >>= 16; 17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bit += 16; 17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n >= 1 << 8) { 17232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n >>= 8; 17242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bit += 8; 17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n >= 1 << 4) { 17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n >>= 4; 17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bit += 4; 17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n >= 1 << 2) { 17332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n >>= 2; 17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bit += 2; 17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n >= 1 << 1) { 17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n >>= 1; 17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bit += 1; 17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return bit; 17432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility method to take a int[] containing codepoints and return 1746f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * a string representation with code units. 17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String valueOf(int[]source){ 17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: Investigate why this method is not on UTF16 class 17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder result = new StringBuilder(source.length); 17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for(int i=0; i<source.length; i++){ 17522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.appendCodePoint(source[i]); 17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result.toString(); 17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Utility to duplicate a string count times 17602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param s String to be duplicated. 17612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param count Number of times to duplicate a string. 17622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String repeat(String s, int count) { 17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (count <= 0) return ""; 17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (count == 1) return s; 17662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder result = new StringBuilder(); 17672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < count; ++i) { 17682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller result.append(s); 17692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return result.toString(); 17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1772f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String[] splitString(String src, String target) { 17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return src.split("\\Q" + target + "\\E"); 17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Split the string at runs of ascii whitespace characters. 17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String[] splitWhitespace(String src) { 17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return src.split("\\s+"); 17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse a list of hex numbers and return a string 17862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param string String of hex numbers. 17872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param minLength Minimal length. 17882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param separator Separator. 17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return A string from hex numbers. 17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 17912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String fromHex(String string, int minLength, String separator) { 17922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+")); 17932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1794f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 17952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller /** 17962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Parse a list of hex numbers and return a string 17972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param string String of hex numbers. 17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param minLength Minimal length. 17992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @param separator Separator. 18002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @return A string from hex numbers. 18012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 18022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public static String fromHex(String string, int minLength, Pattern separator) { 18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder buffer = new StringBuilder(); 18042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String[] parts = separator.split(string); 18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (String part : parts) { 18062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (part.length() < minLength) { 18072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw new IllegalArgumentException("code point too short: " + part); 18082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int cp = Integer.parseInt(part, 16); 18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller buffer.appendCodePoint(cp); 18112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return buffer.toString(); 18132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 18142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 1815