1b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato/*
2b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * ProGuard -- shrinking, optimization, obfuscation, and preverification
3b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato *             of Java bytecode.
4b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato *
59f606f95f03a75961498803e24bee6799a7c0885Ying Wang * Copyright (c) 2002-2009 Eric Lafortune (eric@graphics.cornell.edu)
6b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato *
7b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * This program is free software; you can redistribute it and/or modify it
8b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * under the terms of the GNU General Public License as published by the Free
9b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Software Foundation; either version 2 of the License, or (at your option)
10b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * any later version.
11b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato *
12b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * This program is distributed in the hope that it will be useful, but WITHOUT
13b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * more details.
16b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato *
17b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * You should have received a copy of the GNU General Public License along
18b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * with this program; if not, write to the Free Software Foundation, Inc.,
19b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */
21b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratopackage proguard.classfile.constant;
22b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
23b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratoimport proguard.classfile.*;
24b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratoimport proguard.classfile.constant.visitor.ConstantVisitor;
25b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
26b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratoimport java.io.UnsupportedEncodingException;
27b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
28b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato/**
29b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * This Constant represents a UTF-8 constant in the constant pool.
30b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato *
31b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * @author Eric Lafortune
32b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */
33b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratopublic class Utf8Constant extends Constant
34b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato{
35b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final char TWO_BYTE_LIMIT     = 0x80;
36b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  TWO_BYTE_CONSTANT1 = 0xc0;
37b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  TWO_BYTE_CONSTANT2 = 0x80;
38b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  TWO_BYTE_SHIFT1    = 6;
39b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  TWO_BYTE_MASK1     = 0x1f;
40b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  TWO_BYTE_MASK2     = 0x3f;
41b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
42b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final char THREE_BYTE_LIMIT     = 0x800;
43b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_CONSTANT1 = 0xe0;
44b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_CONSTANT2 = 0x80;
45b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_CONSTANT3 = 0x80;
46b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_SHIFT1    = 12;
47b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_SHIFT2    = 6;
48b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_MASK1     = 0x0f;
49b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_MASK2     = 0x3f;
50b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private static final int  THREE_BYTE_MASK3     = 0x3f;
51b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
52b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
53b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    // There are a lot of Utf8Constant objects, so we're optimising their storage.
54b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    // Initially, we're storing the UTF-8 bytes in a byte array.
55b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    // When the corresponding String is requested, we ditch the array and just
56b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    // store the String.
57b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
58b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    //private int u2length;
59b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private byte[] bytes;
60b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
61b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private String string;
62b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
63b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
64b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
65b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Creates an uninitialized Utf8Constant.
66b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     *
67b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
68b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public Utf8Constant()
69b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
70b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
71b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
72b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
73b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
74b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Creates a Utf8Constant containing the given string.
75b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
76b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public Utf8Constant(String string)
77b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
78b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        this.bytes  = null;
79b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        this.string = string;
80b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
81b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
82b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
83b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
84b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Initializes the UTF-8 data with an array of bytes.
85b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
86b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public void setBytes(byte[] bytes)
87b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
88b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        this.bytes  = bytes;
89b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        this.string = null;
90b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
91b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
92b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
93b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
94b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Returns the UTF-8 data as an array of bytes.
95b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
96b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public byte[] getBytes()
97b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
98b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        try
99b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
100b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            switchToByteArrayRepresentation();
101b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
102b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        catch (UnsupportedEncodingException ex)
103b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
104b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            throw new RuntimeException(ex.getMessage());
105b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
106b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
107b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        return bytes;
108b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
109b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
110b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
111b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
112b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Initializes the UTF-8 data with a String.
113b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
114b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public void setString(String utf8String)
115b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
116b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        this.bytes  = null;
117b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        this.string = utf8String;
118b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
119b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
120b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
121b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
122b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Returns the UTF-8 data as a String.
123b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
124b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public String getString()
125b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
126b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        try
127b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
128b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            switchToStringRepresentation();
129b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
130b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        catch (UnsupportedEncodingException ex)
131b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
132b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            throw new RuntimeException(ex.getMessage());
133b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
134b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
135b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        return string;
136b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
137b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
138b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
139b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    // Implementations for Constant.
140b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
141b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public int getTag()
142b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
143b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        return ClassConstants.CONSTANT_Utf8;
144b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
145b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
146b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    public void accept(Clazz clazz, ConstantVisitor constantVisitor)
147b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
148b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        constantVisitor.visitUtf8Constant(clazz, this);
149b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
150b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
151b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
152b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    // Small utility methods.
153b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
154b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
155b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Switches to a byte array representation of the UTF-8 data.
156b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
157b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private void switchToByteArrayRepresentation() throws UnsupportedEncodingException
158b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
159b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        if (bytes == null)
160b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
161b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            bytes  = getByteArrayRepresentation(string);
162b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            string = null;
163b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
164b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
165b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
166b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
167b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
168b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Switches to a String representation of the UTF-8 data.
169b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
170b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private void switchToStringRepresentation() throws UnsupportedEncodingException
171b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
172b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        if (string == null)
173b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
174b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            string = getStringRepresentation(bytes);
175b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            bytes  = null;
176b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
177b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
178b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
179b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
180b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
181b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Returns the modified UTF-8 byte array representation of the given string.
182b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
183b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private byte[] getByteArrayRepresentation(String string) throws UnsupportedEncodingException
184b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
185b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // We're computing the byte array ourselves, because the implementation
186b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // of String.getBytes("UTF-8") has a bug, at least up to JRE 1.4.2.
187b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // Also note the special treatment of the 0 character.
188b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
189b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // Compute the byte array length.
190b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        int byteLength   = 0;
191b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        int stringLength = string.length();
192b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        for (int stringIndex = 0; stringIndex < stringLength; stringIndex++)
193b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
194b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            char c = string.charAt(stringIndex);
195b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
196b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            // The character is represented by one, two, or three bytes.
197b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            byteLength += c == 0                ? 2 :
198b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                          c <  TWO_BYTE_LIMIT   ? 1 :
199b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                          c <  THREE_BYTE_LIMIT ? 2 :
200b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                                                  3;
201b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
202b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
203b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // Allocate the byte array with the computed length.
204b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        byte[] bytes  = new byte[byteLength];
205b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
206b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // Fill out the array.
207b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        int byteIndex = 0;
208b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        for (int stringIndex = 0; stringIndex < stringLength; stringIndex++)
209b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
210b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            char c = string.charAt(stringIndex);
211b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            if (c == 0)
212b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            {
213b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                // The 0 character gets a two-byte representation in classes.
214b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT1;
215b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT2;
216b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            }
217b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            else if (c < TWO_BYTE_LIMIT)
218b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            {
219b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                // The character is represented by a single byte.
220b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)c;
221b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            }
222b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            else if (c < THREE_BYTE_LIMIT)
223b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            {
224b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                // The character is represented by two bytes.
225b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT1 | ((c >>> TWO_BYTE_SHIFT1) & TWO_BYTE_MASK1));
226b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT2 | ( c                      & TWO_BYTE_MASK2));
227b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            }
228b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            else
229b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            {
230b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                // The character is represented by three bytes.
231b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT1 | ((c >>> THREE_BYTE_SHIFT1) & THREE_BYTE_MASK1));
232b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT2 | ((c >>> THREE_BYTE_SHIFT2) & THREE_BYTE_MASK2));
233b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT3 | ( c                        & THREE_BYTE_MASK3));
234b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            }
235b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
236b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
237b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        return bytes;
238b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
239b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
240b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
241b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    /**
242b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     * Returns the String representation of the given modified UTF-8 byte array.
243b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato     */
244b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException
245b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    {
246b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // We're computing the string ourselves, because the implementation
247b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // of "new String(bytes)" doesn't honor the special treatment of
248b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // the 0 character in JRE 1.6_u11.
249b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
250b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // Allocate the byte array with the computed length.
251b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        char[] chars  = new char[bytes.length];
252b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
253b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        // Fill out the array.
254b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        int charIndex = 0;
255b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        int byteIndex = 0;
256b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        while (byteIndex < bytes.length)
257b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        {
258b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
259b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            int b = bytes[byteIndex++] & 0xff;
260b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
261b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            // Depending on the flag bits in the first byte, the character
262b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            // is represented by a single byte, by two bytes, or by three
263b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            // bytes. We're not checking the redundant flag bits in the
264b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            // second byte and the third byte.
265b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            try
266b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            {
267b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                chars[charIndex++] =
268b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                    (char)(b < TWO_BYTE_CONSTANT1   ? b                                                          :
269b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
270b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                           b < THREE_BYTE_CONSTANT1 ? ((b                  & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) |
271b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                                                      ((bytes[byteIndex++] & TWO_BYTE_MASK2)                   ) :
272b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
273b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                                                      ((b                  & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) |
274b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                                                      ((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) |
275b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                                                      ((bytes[byteIndex++] & THREE_BYTE_MASK3)                     ));
276b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            }
277b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            catch (ArrayIndexOutOfBoundsException e)
278b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            {
279b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato                throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]");
280b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato            }
281b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        }
282b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato
283b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato        return new String(chars, 0, charIndex);
284b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato    }
285b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato}
286