1b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato/* 2b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * ProGuard -- shrinking, optimization, obfuscation, and preverification 3b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * of Java bytecode. 4b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 59f606f95f03a75961498803e24bee6799a7c0885Ying Wang * Copyright (c) 2002-2009 Eric Lafortune (eric@graphics.cornell.edu) 6b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 7b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * This program is free software; you can redistribute it and/or modify it 8b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * under the terms of the GNU General Public License as published by the Free 9b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Software Foundation; either version 2 of the License, or (at your option) 10b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * any later version. 11b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 12b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * This program is distributed in the hope that it will be useful, but WITHOUT 13b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * more details. 16b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 17b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * You should have received a copy of the GNU General Public License along 18b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * with this program; if not, write to the Free Software Foundation, Inc., 19b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 21b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratopackage proguard.classfile.constant; 22b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 23b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratoimport proguard.classfile.*; 24b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratoimport proguard.classfile.constant.visitor.ConstantVisitor; 25b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 26b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratoimport java.io.UnsupportedEncodingException; 27b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 28b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato/** 29b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * This Constant represents a UTF-8 constant in the constant pool. 30b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 31b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * @author Eric Lafortune 32b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 33b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onoratopublic class Utf8Constant extends Constant 34b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato{ 35b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final char TWO_BYTE_LIMIT = 0x80; 36b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int TWO_BYTE_CONSTANT1 = 0xc0; 37b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int TWO_BYTE_CONSTANT2 = 0x80; 38b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int TWO_BYTE_SHIFT1 = 6; 39b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int TWO_BYTE_MASK1 = 0x1f; 40b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int TWO_BYTE_MASK2 = 0x3f; 41b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 42b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final char THREE_BYTE_LIMIT = 0x800; 43b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_CONSTANT1 = 0xe0; 44b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_CONSTANT2 = 0x80; 45b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_CONSTANT3 = 0x80; 46b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_SHIFT1 = 12; 47b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_SHIFT2 = 6; 48b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_MASK1 = 0x0f; 49b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_MASK2 = 0x3f; 50b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private static final int THREE_BYTE_MASK3 = 0x3f; 51b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 52b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 53b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // There are a lot of Utf8Constant objects, so we're optimising their storage. 54b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Initially, we're storing the UTF-8 bytes in a byte array. 55b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // When the corresponding String is requested, we ditch the array and just 56b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // store the String. 57b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 58b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato //private int u2length; 59b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private byte[] bytes; 60b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 61b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private String string; 62b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 63b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 64b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 65b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Creates an uninitialized Utf8Constant. 66b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * 67b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 68b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public Utf8Constant() 69b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 70b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 71b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 72b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 73b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 74b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Creates a Utf8Constant containing the given string. 75b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 76b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public Utf8Constant(String string) 77b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 78b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato this.bytes = null; 79b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato this.string = string; 80b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 81b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 82b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 83b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 84b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Initializes the UTF-8 data with an array of bytes. 85b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 86b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public void setBytes(byte[] bytes) 87b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 88b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato this.bytes = bytes; 89b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato this.string = null; 90b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 91b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 92b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 93b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 94b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Returns the UTF-8 data as an array of bytes. 95b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 96b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public byte[] getBytes() 97b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 98b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato try 99b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 100b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato switchToByteArrayRepresentation(); 101b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 102b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato catch (UnsupportedEncodingException ex) 103b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 104b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato throw new RuntimeException(ex.getMessage()); 105b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 106b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 107b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato return bytes; 108b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 109b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 110b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 111b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 112b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Initializes the UTF-8 data with a String. 113b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 114b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public void setString(String utf8String) 115b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 116b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato this.bytes = null; 117b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato this.string = utf8String; 118b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 119b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 120b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 121b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 122b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Returns the UTF-8 data as a String. 123b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 124b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public String getString() 125b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 126b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato try 127b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 128b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato switchToStringRepresentation(); 129b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 130b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato catch (UnsupportedEncodingException ex) 131b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 132b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato throw new RuntimeException(ex.getMessage()); 133b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 134b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 135b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato return string; 136b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 137b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 138b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 139b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Implementations for Constant. 140b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 141b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public int getTag() 142b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 143b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato return ClassConstants.CONSTANT_Utf8; 144b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 145b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 146b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato public void accept(Clazz clazz, ConstantVisitor constantVisitor) 147b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 148b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato constantVisitor.visitUtf8Constant(clazz, this); 149b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 150b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 151b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 152b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Small utility methods. 153b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 154b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 155b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Switches to a byte array representation of the UTF-8 data. 156b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 157b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private void switchToByteArrayRepresentation() throws UnsupportedEncodingException 158b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 159b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato if (bytes == null) 160b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 161b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes = getByteArrayRepresentation(string); 162b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato string = null; 163b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 164b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 165b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 166b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 167b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 168b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Switches to a String representation of the UTF-8 data. 169b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 170b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private void switchToStringRepresentation() throws UnsupportedEncodingException 171b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 172b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato if (string == null) 173b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 174b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato string = getStringRepresentation(bytes); 175b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes = null; 176b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 177b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 178b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 179b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 180b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 181b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Returns the modified UTF-8 byte array representation of the given string. 182b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 183b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private byte[] getByteArrayRepresentation(String string) throws UnsupportedEncodingException 184b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 185b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // We're computing the byte array ourselves, because the implementation 186b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // of String.getBytes("UTF-8") has a bug, at least up to JRE 1.4.2. 187b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Also note the special treatment of the 0 character. 188b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 189b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Compute the byte array length. 190b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato int byteLength = 0; 191b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato int stringLength = string.length(); 192b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) 193b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 194b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato char c = string.charAt(stringIndex); 195b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 196b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // The character is represented by one, two, or three bytes. 197b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato byteLength += c == 0 ? 2 : 198b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato c < TWO_BYTE_LIMIT ? 1 : 199b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato c < THREE_BYTE_LIMIT ? 2 : 200b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 3; 201b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 202b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 203b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Allocate the byte array with the computed length. 204b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato byte[] bytes = new byte[byteLength]; 205b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 206b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Fill out the array. 207b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato int byteIndex = 0; 208b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) 209b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 210b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato char c = string.charAt(stringIndex); 211b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato if (c == 0) 212b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 213b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // The 0 character gets a two-byte representation in classes. 214b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT1; 215b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT2; 216b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 217b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato else if (c < TWO_BYTE_LIMIT) 218b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 219b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // The character is represented by a single byte. 220b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)c; 221b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 222b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato else if (c < THREE_BYTE_LIMIT) 223b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 224b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // The character is represented by two bytes. 225b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT1 | ((c >>> TWO_BYTE_SHIFT1) & TWO_BYTE_MASK1)); 226b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT2 | ( c & TWO_BYTE_MASK2)); 227b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 228b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato else 229b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 230b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // The character is represented by three bytes. 231b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT1 | ((c >>> THREE_BYTE_SHIFT1) & THREE_BYTE_MASK1)); 232b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT2 | ((c >>> THREE_BYTE_SHIFT2) & THREE_BYTE_MASK2)); 233b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT3 | ( c & THREE_BYTE_MASK3)); 234b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 235b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 236b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 237b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato return bytes; 238b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 239b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 240b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 241b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato /** 242b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato * Returns the String representation of the given modified UTF-8 byte array. 243b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato */ 244b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException 245b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 246b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // We're computing the string ourselves, because the implementation 247b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // of "new String(bytes)" doesn't honor the special treatment of 248b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // the 0 character in JRE 1.6_u11. 249b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 250b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Allocate the byte array with the computed length. 251b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato char[] chars = new char[bytes.length]; 252b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 253b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Fill out the array. 254b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato int charIndex = 0; 255b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato int byteIndex = 0; 256b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato while (byteIndex < bytes.length) 257b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 258b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 259b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato int b = bytes[byteIndex++] & 0xff; 260b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 261b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // Depending on the flag bits in the first byte, the character 262b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // is represented by a single byte, by two bytes, or by three 263b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // bytes. We're not checking the redundant flag bits in the 264b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato // second byte and the third byte. 265b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato try 266b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 267b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato chars[charIndex++] = 268b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato (char)(b < TWO_BYTE_CONSTANT1 ? b : 269b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 270b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato b < THREE_BYTE_CONSTANT1 ? ((b & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) | 271b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato ((bytes[byteIndex++] & TWO_BYTE_MASK2) ) : 272b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 273b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato ((b & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) | 274b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato ((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) | 275b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato ((bytes[byteIndex++] & THREE_BYTE_MASK3) )); 276b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 277b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato catch (ArrayIndexOutOfBoundsException e) 278b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato { 279b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]"); 280b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 281b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 282b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato 283b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato return new String(chars, 0, charIndex); 284b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato } 285b72c5c2e5482cf10117b2b25f642f7616b2326c3Joe Onorato} 286