1579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson/* 2579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Copyright (C) 2007 The Android Open Source Project 3579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 4579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Licensed under the Apache License, Version 2.0 (the "License"); 5579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * you may not use this file except in compliance with the License. 6579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * You may obtain a copy of the License at 7579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 8579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * http://www.apache.org/licenses/LICENSE-2.0 9579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 10579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Unless required by applicable law or agreed to in writing, software 11579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * distributed under the License is distributed on an "AS IS" BASIS, 12579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * See the License for the specific language governing permissions and 14579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * limitations under the License. 15579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 16579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 17579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonpackage com.android.dx.rop.cst; 18579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 19579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonimport com.android.dx.rop.type.Type; 20579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonimport com.android.dx.util.ByteArray; 21579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonimport com.android.dx.util.Hex; 22579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 23579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson/** 24579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Constants of type {@code CONSTANT_Utf8_info} or {@code CONSTANT_String_info}. 25579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 26579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonpublic final class CstString extends TypedConstant { 27579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 28579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * {@code non-null;} instance representing {@code ""}, that is, the 29579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * empty string 30579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 31579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public static final CstString EMPTY_STRING = new CstString(""); 32579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 33579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@code non-null;} the UTF-8 value as a string */ 34579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson private final String string; 35579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 36579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@code non-null;} the UTF-8 value as bytes */ 37579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson private final ByteArray bytes; 38579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 39579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 40579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Converts a string into its MUTF-8 form. MUTF-8 differs from normal UTF-8 41579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * in the handling of character '\0' and surrogate pairs. 42579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 43579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @param string {@code non-null;} the string to convert 44579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code non-null;} the UTF-8 bytes for it 45579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 46579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public static byte[] stringToUtf8Bytes(String string) { 47579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int len = string.length(); 48579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson byte[] bytes = new byte[len * 3]; // Avoid having to reallocate. 49579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int outAt = 0; 50579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 51579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson for (int i = 0; i < len; i++) { 52579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson char c = string.charAt(i); 53579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((c != 0) && (c < 0x80)) { 54579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson bytes[outAt] = (byte) c; 55579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson outAt++; 56579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } else if (c < 0x800) { 57579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0); 58579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80); 59579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson outAt += 2; 60579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } else { 61579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0); 62579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80); 63579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80); 64579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson outAt += 3; 65579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 66579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 67579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 68579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson byte[] result = new byte[outAt]; 69579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson System.arraycopy(bytes, 0, result, 0, outAt); 70579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return result; 71579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 72579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 73579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 74579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Converts an array of UTF-8 bytes into a string. 75579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 76579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @param bytes {@code non-null;} the bytes to convert 77579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code non-null;} the converted string 78579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 79579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public static String utf8BytesToString(ByteArray bytes) { 80579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int length = bytes.size(); 81579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson char[] chars = new char[length]; // This is sized to avoid a realloc. 82579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int outAt = 0; 83579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 84579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson for (int at = 0; length > 0; /*at*/) { 85579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int v0 = bytes.getUnsignedByte(at); 86579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson char out; 87579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson switch (v0 >> 4) { 88579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson case 0x00: case 0x01: case 0x02: case 0x03: 89579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson case 0x04: case 0x05: case 0x06: case 0x07: { 90579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson // 0XXXXXXX -- single-byte encoding 91579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson length--; 92579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (v0 == 0) { 93579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson // A single zero byte is illegal. 94579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v0, at); 95579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 96579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson out = (char) v0; 97579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson at++; 98579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson break; 99579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 100579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson case 0x0c: case 0x0d: { 101579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson // 110XXXXX -- two-byte encoding 102579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson length -= 2; 103579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (length < 0) { 104579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v0, at); 105579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 106579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int v1 = bytes.getUnsignedByte(at + 1); 107579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((v1 & 0xc0) != 0x80) { 108579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v1, at + 1); 109579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 110579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f); 111579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((value != 0) && (value < 0x80)) { 112579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /* 113579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * This should have been represented with 114579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * one-byte encoding. 115579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 116579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v1, at + 1); 117579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 118579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson out = (char) value; 119579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson at += 2; 120579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson break; 121579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 122579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson case 0x0e: { 123579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson // 1110XXXX -- three-byte encoding 124579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson length -= 3; 125579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (length < 0) { 126579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v0, at); 127579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 128579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int v1 = bytes.getUnsignedByte(at + 1); 129579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((v1 & 0xc0) != 0x80) { 130579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v1, at + 1); 131579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 132579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int v2 = bytes.getUnsignedByte(at + 2); 133579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((v1 & 0xc0) != 0x80) { 134579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v2, at + 2); 135579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 136579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) | 137579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson (v2 & 0x3f); 138579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (value < 0x800) { 139579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /* 140579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * This should have been represented with one- or 141579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * two-byte encoding. 142579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 143579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v2, at + 2); 144579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 145579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson out = (char) value; 146579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson at += 3; 147579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson break; 148579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 149579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson default: { 150579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson // 10XXXXXX, 1111XXXX -- illegal 151579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return throwBadUtf8(v0, at); 152579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 153579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 154579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson chars[outAt] = out; 155579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson outAt++; 156579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 157579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 158579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return new String(chars, 0, outAt); 159579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 160579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 161579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 162579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Helper for {@link #utf8BytesToString}, which throws the right 163579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * exception for a bogus utf-8 byte. 164579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 165579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @param value the byte value 166579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @param offset the file offset 167579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return never 168579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @throws IllegalArgumentException always thrown 169579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 170579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson private static String throwBadUtf8(int value, int offset) { 171579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) + 172579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson " at offset " + Hex.u4(offset)); 173579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 174579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 175579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 176579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Constructs an instance from a {@code String}. 177579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 178579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @param string {@code non-null;} the UTF-8 value as a string 179579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 180579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public CstString(String string) { 181579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (string == null) { 182579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson throw new NullPointerException("string == null"); 183579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 184579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 185579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson this.string = string.intern(); 186579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson this.bytes = new ByteArray(stringToUtf8Bytes(string)); 187579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 188579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 189579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 190579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Constructs an instance from some UTF-8 bytes. 191579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 192579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @param bytes {@code non-null;} array of the UTF-8 bytes 193579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 194579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public CstString(ByteArray bytes) { 195579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (bytes == null) { 196579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson throw new NullPointerException("bytes == null"); 197579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 198579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 199579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson this.bytes = bytes; 200579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson this.string = utf8BytesToString(bytes).intern(); 201579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 202579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 203579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@inheritDoc} */ 204579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson @Override 205579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public boolean equals(Object other) { 206579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (!(other instanceof CstString)) { 207579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return false; 208579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 209579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 210579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return string.equals(((CstString) other).string); 211579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 212579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 213579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@inheritDoc} */ 214579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson @Override 215579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public int hashCode() { 216579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return string.hashCode(); 217579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 218579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 219579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@inheritDoc} */ 220579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson @Override 221579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson protected int compareTo0(Constant other) { 222579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return string.compareTo(((CstString) other).string); 223579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 224579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 225579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@inheritDoc} */ 226579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson @Override 227579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public String toString() { 228579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return "string{\"" + toHuman() + "\"}"; 229579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 230579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 231579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@inheritDoc} */ 232579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson @Override 233579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public String typeName() { 234579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return "utf8"; 235579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 236579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 237579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@inheritDoc} */ 238579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson @Override 239579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public boolean isCategory2() { 240579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return false; 241579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 242579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 243579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** {@inheritDoc} */ 244579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public String toHuman() { 245579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int len = string.length(); 246579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson StringBuilder sb = new StringBuilder(len * 3 / 2); 247579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 248579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson for (int i = 0; i < len; i++) { 249579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson char c = string.charAt(i); 250579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((c >= ' ') && (c < 0x7f)) { 251579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((c == '\'') || (c == '\"') || (c == '\\')) { 252579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append('\\'); 253579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 254579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append(c); 255579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } else if (c <= 0x7f) { 256579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson switch (c) { 257579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson case '\n': sb.append("\\n"); break; 258579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson case '\r': sb.append("\\r"); break; 259579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson case '\t': sb.append("\\t"); break; 260579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson default: { 261579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /* 262579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Represent the character as an octal escape. 263579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * If the next character is a valid octal 264579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * digit, disambiguate by using the 265579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * three-digit form. 266579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 267579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson char nextChar = 268579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson (i < (len - 1)) ? string.charAt(i + 1) : 0; 269579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson boolean displayZero = 270579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson (nextChar >= '0') && (nextChar <= '7'); 271579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append('\\'); 272579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson for (int shift = 6; shift >= 0; shift -= 3) { 273579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson char outChar = (char) (((c >> shift) & 7) + '0'); 274579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if ((outChar != '0') || displayZero) { 275579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append(outChar); 276579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson displayZero = true; 277579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 278579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 279579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (! displayZero) { 280579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson // Ironic edge case: The original value was 0. 281579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append('0'); 282579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 283579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson break; 284579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 285579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 286579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } else { 287579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append("\\u"); 288579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append(Character.forDigit(c >> 12, 16)); 289579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append(Character.forDigit((c >> 8) & 0x0f, 16)); 290579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append(Character.forDigit((c >> 4) & 0x0f, 16)); 291579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson sb.append(Character.forDigit(c & 0x0f, 16)); 292579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 293579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 294579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 295579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return sb.toString(); 296579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 297579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 298579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 299579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Gets the value as a human-oriented string, surrounded by double 300579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * quotes. 301579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 302579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code non-null;} the quoted string 303579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 304579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public String toQuoted() { 305579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return '\"' + toHuman() + '\"'; 306579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 307579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 308579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 309579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Gets the value as a human-oriented string, surrounded by double 310579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * quotes, but ellipsizes the result if it is longer than the given 311579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * maximum length 312579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 313579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @param maxLength {@code >= 5;} the maximum length of the string to return 314579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code non-null;} the quoted string 315579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 316579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public String toQuoted(int maxLength) { 317579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson String string = toHuman(); 318579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson int length = string.length(); 319579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson String ellipses; 320579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 321579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson if (length <= (maxLength - 2)) { 322579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson ellipses = ""; 323579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } else { 324579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson string = string.substring(0, maxLength - 5); 325579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson ellipses = "..."; 326579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 327579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 328579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return '\"' + string + ellipses + '\"'; 329579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 330579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 331579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 332579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Gets the UTF-8 value as a string. 333579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * The returned string is always already interned. 334579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 335579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code non-null;} the UTF-8 value as a string 336579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 337579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public String getString() { 338579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return string; 339579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 340579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 341579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 342579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Gets the UTF-8 value as UTF-8 encoded bytes. 343579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 344579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code non-null;} an array of the UTF-8 bytes 345579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 346579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public ByteArray getBytes() { 347579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return bytes; 348579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 349579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 350579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 351579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Gets the size of this instance as UTF-8 code points. That is, 352579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * get the number of bytes in the UTF-8 encoding of this instance. 353579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 354579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code >= 0;} the UTF-8 size 355579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 356579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public int getUtf8Size() { 357579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return bytes.size(); 358579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 359579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 360579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson /** 361579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Gets the size of this instance as UTF-16 code points. That is, 362579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * get the number of 16-bit chars in the UTF-16 encoding of this 363579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * instance. This is the same as the {@code length} of the 364579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Java {@code String} representation of this instance. 365579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * 366579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * @return {@code >= 0;} the UTF-16 size 367579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */ 368579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public int getUtf16Size() { 369579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return string.length(); 370579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 371579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson 372579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson public Type getType() { 373579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson return Type.STRING; 374579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson } 375579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson} 376