1579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson/*
2579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Copyright (C) 2007 The Android Open Source Project
3579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson *
4579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Licensed under the Apache License, Version 2.0 (the "License");
5579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * you may not use this file except in compliance with the License.
6579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * You may obtain a copy of the License at
7579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson *
8579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson *      http://www.apache.org/licenses/LICENSE-2.0
9579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson *
10579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Unless required by applicable law or agreed to in writing, software
11579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * distributed under the License is distributed on an "AS IS" BASIS,
12579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * See the License for the specific language governing permissions and
14579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * limitations under the License.
15579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */
16579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
17579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonpackage com.android.dx.rop.cst;
18579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
19579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonimport com.android.dx.rop.type.Type;
20579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonimport com.android.dx.util.ByteArray;
21579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonimport com.android.dx.util.Hex;
22579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
23579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson/**
24579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson * Constants of type {@code CONSTANT_Utf8_info} or {@code CONSTANT_String_info}.
25579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson */
26579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilsonpublic final class CstString extends TypedConstant {
27579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
28579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * {@code non-null;} instance representing {@code ""}, that is, the
29579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * empty string
30579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
31579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public static final CstString EMPTY_STRING = new CstString("");
32579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
33579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@code non-null;} the UTF-8 value as a string */
34579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    private final String string;
35579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
36579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@code non-null;} the UTF-8 value as bytes */
37579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    private final ByteArray bytes;
38579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
39579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
40579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Converts a string into its MUTF-8 form. MUTF-8 differs from normal UTF-8
41579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * in the handling of character '\0' and surrogate pairs.
42579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
43579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @param string {@code non-null;} the string to convert
44579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code non-null;} the UTF-8 bytes for it
45579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
46579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public static byte[] stringToUtf8Bytes(String string) {
47579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        int len = string.length();
48579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
49579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        int outAt = 0;
50579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
51579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        for (int i = 0; i < len; i++) {
52579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            char c = string.charAt(i);
53579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            if ((c != 0) && (c < 0x80)) {
54579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                bytes[outAt] = (byte) c;
55579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                outAt++;
56579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            } else if (c < 0x800) {
57579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0);
58579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80);
59579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                outAt += 2;
60579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            } else {
61579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0);
62579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80);
63579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80);
64579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                outAt += 3;
65579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            }
66579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        }
67579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
68579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        byte[] result = new byte[outAt];
69579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        System.arraycopy(bytes, 0, result, 0, outAt);
70579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return result;
71579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
72579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
73579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
74579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Converts an array of UTF-8 bytes into a string.
75579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
76579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @param bytes {@code non-null;} the bytes to convert
77579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code non-null;} the converted string
78579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
79579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public static String utf8BytesToString(ByteArray bytes) {
80579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        int length = bytes.size();
81579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        char[] chars = new char[length]; // This is sized to avoid a realloc.
82579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        int outAt = 0;
83579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
84579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        for (int at = 0; length > 0; /*at*/) {
85579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            int v0 = bytes.getUnsignedByte(at);
86579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            char out;
87579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            switch (v0 >> 4) {
88579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                case 0x00: case 0x01: case 0x02: case 0x03:
89579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                case 0x04: case 0x05: case 0x06: case 0x07: {
90579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    // 0XXXXXXX -- single-byte encoding
91579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    length--;
92579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if (v0 == 0) {
93579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        // A single zero byte is illegal.
94579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v0, at);
95579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
96579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    out = (char) v0;
97579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    at++;
98579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    break;
99579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                }
100579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                case 0x0c: case 0x0d: {
101579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    // 110XXXXX -- two-byte encoding
102579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    length -= 2;
103579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if (length < 0) {
104579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v0, at);
105579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
106579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    int v1 = bytes.getUnsignedByte(at + 1);
107579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if ((v1 & 0xc0) != 0x80) {
108579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v1, at + 1);
109579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
110579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
111579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if ((value != 0) && (value < 0x80)) {
112579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        /*
113579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * This should have been represented with
114579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * one-byte encoding.
115579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         */
116579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v1, at + 1);
117579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
118579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    out = (char) value;
119579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    at += 2;
120579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    break;
121579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                }
122579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                case 0x0e: {
123579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    // 1110XXXX -- three-byte encoding
124579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    length -= 3;
125579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if (length < 0) {
126579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v0, at);
127579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
128579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    int v1 = bytes.getUnsignedByte(at + 1);
129579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if ((v1 & 0xc0) != 0x80) {
130579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v1, at + 1);
131579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
132579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    int v2 = bytes.getUnsignedByte(at + 2);
133579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if ((v1 & 0xc0) != 0x80) {
134579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v2, at + 2);
135579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
136579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) |
137579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        (v2 & 0x3f);
138579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    if (value < 0x800) {
139579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        /*
140579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * This should have been represented with one- or
141579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * two-byte encoding.
142579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         */
143579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        return throwBadUtf8(v2, at + 2);
144579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
145579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    out = (char) value;
146579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    at += 3;
147579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    break;
148579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                }
149579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                default: {
150579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    // 10XXXXXX, 1111XXXX -- illegal
151579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    return throwBadUtf8(v0, at);
152579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                }
153579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            }
154579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            chars[outAt] = out;
155579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            outAt++;
156579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        }
157579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
158579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return new String(chars, 0, outAt);
159579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
160579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
161579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
162579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Helper for {@link #utf8BytesToString}, which throws the right
163579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * exception for a bogus utf-8 byte.
164579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
165579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @param value the byte value
166579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @param offset the file offset
167579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return never
168579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @throws IllegalArgumentException always thrown
169579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
170579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    private static String throwBadUtf8(int value, int offset) {
171579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
172579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                                           " at offset " + Hex.u4(offset));
173579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
174579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
175579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
176579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Constructs an instance from a {@code String}.
177579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
178579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @param string {@code non-null;} the UTF-8 value as a string
179579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
180579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public CstString(String string) {
181579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        if (string == null) {
182579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            throw new NullPointerException("string == null");
183579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        }
184579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
185579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        this.string = string.intern();
186579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        this.bytes = new ByteArray(stringToUtf8Bytes(string));
187579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
188579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
189579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
190579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Constructs an instance from some UTF-8 bytes.
191579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
192579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @param bytes {@code non-null;} array of the UTF-8 bytes
193579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
194579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public CstString(ByteArray bytes) {
195579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        if (bytes == null) {
196579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            throw new NullPointerException("bytes == null");
197579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        }
198579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
199579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        this.bytes = bytes;
200579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        this.string = utf8BytesToString(bytes).intern();
201579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
202579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
203579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@inheritDoc} */
204579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    @Override
205579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public boolean equals(Object other) {
206579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        if (!(other instanceof CstString)) {
207579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            return false;
208579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        }
209579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
210579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return string.equals(((CstString) other).string);
211579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
212579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
213579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@inheritDoc} */
214579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    @Override
215579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public int hashCode() {
216579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return string.hashCode();
217579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
218579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
219579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@inheritDoc} */
220579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    @Override
221579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    protected int compareTo0(Constant other) {
222579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return string.compareTo(((CstString) other).string);
223579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
224579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
225579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@inheritDoc} */
226579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    @Override
227579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public String toString() {
228579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return "string{\"" + toHuman() + "\"}";
229579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
230579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
231579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@inheritDoc} */
232579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    @Override
233579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public String typeName() {
234579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return "utf8";
235579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
236579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
237579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@inheritDoc} */
238579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    @Override
239579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public boolean isCategory2() {
240579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return false;
241579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
242579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
243579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /** {@inheritDoc} */
244579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public String toHuman() {
245579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        int len = string.length();
246579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        StringBuilder sb = new StringBuilder(len * 3 / 2);
247579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
248579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        for (int i = 0; i < len; i++) {
249579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            char c = string.charAt(i);
250579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            if ((c >= ' ') && (c < 0x7f)) {
251579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                if ((c == '\'') || (c == '\"') || (c == '\\')) {
252579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    sb.append('\\');
253579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                }
254579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                sb.append(c);
255579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            } else if (c <= 0x7f) {
256579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                switch (c) {
257579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    case '\n': sb.append("\\n"); break;
258579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    case '\r': sb.append("\\r"); break;
259579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    case '\t': sb.append("\\t"); break;
260579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    default: {
261579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        /*
262579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * Represent the character as an octal escape.
263579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * If the next character is a valid octal
264579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * digit, disambiguate by using the
265579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         * three-digit form.
266579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                         */
267579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        char nextChar =
268579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                            (i < (len - 1)) ? string.charAt(i + 1) : 0;
269579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        boolean displayZero =
270579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                            (nextChar >= '0') && (nextChar <= '7');
271579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        sb.append('\\');
272579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        for (int shift = 6; shift >= 0; shift -= 3) {
273579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                            char outChar = (char) (((c >> shift) & 7) + '0');
274579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                            if ((outChar != '0') || displayZero) {
275579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                                sb.append(outChar);
276579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                                displayZero = true;
277579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                            }
278579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        }
279579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        if (! displayZero) {
280579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                            // Ironic edge case: The original value was 0.
281579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                            sb.append('0');
282579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        }
283579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                        break;
284579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                    }
285579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                }
286579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            } else {
287579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                sb.append("\\u");
288579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                sb.append(Character.forDigit(c >> 12, 16));
289579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
290579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
291579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson                sb.append(Character.forDigit(c & 0x0f, 16));
292579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            }
293579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        }
294579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
295579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return sb.toString();
296579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
297579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
298579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
299579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Gets the value as a human-oriented string, surrounded by double
300579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * quotes.
301579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
302579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code non-null;} the quoted string
303579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
304579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public String toQuoted() {
305579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return '\"' + toHuman() + '\"';
306579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
307579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
308579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
309579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Gets the value as a human-oriented string, surrounded by double
310579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * quotes, but ellipsizes the result if it is longer than the given
311579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * maximum length
312579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
313579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @param maxLength {@code >= 5;} the maximum length of the string to return
314579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code non-null;} the quoted string
315579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
316579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public String toQuoted(int maxLength) {
317579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        String string = toHuman();
318579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        int length = string.length();
319579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        String ellipses;
320579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
321579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        if (length <= (maxLength - 2)) {
322579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            ellipses = "";
323579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        } else {
324579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            string = string.substring(0, maxLength - 5);
325579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson            ellipses = "...";
326579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        }
327579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
328579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return '\"' + string + ellipses + '\"';
329579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
330579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
331579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
332579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Gets the UTF-8 value as a string.
333579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * The returned string is always already interned.
334579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
335579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code non-null;} the UTF-8 value as a string
336579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
337579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public String getString() {
338579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return string;
339579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
340579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
341579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
342579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Gets the UTF-8 value as UTF-8 encoded bytes.
343579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
344579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code non-null;} an array of the UTF-8 bytes
345579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
346579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public ByteArray getBytes() {
347579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return bytes;
348579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
349579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
350579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
351579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Gets the size of this instance as UTF-8 code points. That is,
352579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * get the number of bytes in the UTF-8 encoding of this instance.
353579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
354579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code >= 0;} the UTF-8 size
355579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
356579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public int getUtf8Size() {
357579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return bytes.size();
358579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
359579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
360579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    /**
361579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Gets the size of this instance as UTF-16 code points. That is,
362579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * get the number of 16-bit chars in the UTF-16 encoding of this
363579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * instance. This is the same as the {@code length} of the
364579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * Java {@code String} representation of this instance.
365579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     *
366579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     * @return {@code >= 0;} the UTF-16 size
367579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson     */
368579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public int getUtf16Size() {
369579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return string.length();
370579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
371579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson
372579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    public Type getType() {
373579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson        return Type.STRING;
374579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson    }
375579d7739c53a2707ad711a2d2cae46d7d782f06Jesse Wilson}
376