1/* 2 * ProGuard -- shrinking, optimization, obfuscation, and preverification 3 * of Java bytecode. 4 * 5 * Copyright (c) 2002-2009 Eric Lafortune (eric@graphics.cornell.edu) 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the Free 9 * Software Foundation; either version 2 of the License, or (at your option) 10 * any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 * 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, write to the Free Software Foundation, Inc., 19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21package proguard.classfile.constant; 22 23import proguard.classfile.*; 24import proguard.classfile.constant.visitor.ConstantVisitor; 25 26import java.io.UnsupportedEncodingException; 27 28/** 29 * This Constant represents a UTF-8 constant in the constant pool. 30 * 31 * @author Eric Lafortune 32 */ 33public class Utf8Constant extends Constant 34{ 35 private static final char TWO_BYTE_LIMIT = 0x80; 36 private static final int TWO_BYTE_CONSTANT1 = 0xc0; 37 private static final int TWO_BYTE_CONSTANT2 = 0x80; 38 private static final int TWO_BYTE_SHIFT1 = 6; 39 private static final int TWO_BYTE_MASK1 = 0x1f; 40 private static final int TWO_BYTE_MASK2 = 0x3f; 41 42 private static final char THREE_BYTE_LIMIT = 0x800; 43 private static final int THREE_BYTE_CONSTANT1 = 0xe0; 44 private static final int THREE_BYTE_CONSTANT2 = 0x80; 45 private static final int THREE_BYTE_CONSTANT3 = 0x80; 46 private static final int THREE_BYTE_SHIFT1 = 12; 47 private static final int THREE_BYTE_SHIFT2 = 6; 48 private static final int THREE_BYTE_MASK1 = 0x0f; 49 private static final int THREE_BYTE_MASK2 = 0x3f; 50 private static final int THREE_BYTE_MASK3 = 0x3f; 51 52 53 // There are a lot of Utf8Constant objects, so we're optimising their storage. 54 // Initially, we're storing the UTF-8 bytes in a byte array. 55 // When the corresponding String is requested, we ditch the array and just 56 // store the String. 57 58 //private int u2length; 59 private byte[] bytes; 60 61 private String string; 62 63 64 /** 65 * Creates an uninitialized Utf8Constant. 66 * 67 */ 68 public Utf8Constant() 69 { 70 } 71 72 73 /** 74 * Creates a Utf8Constant containing the given string. 75 */ 76 public Utf8Constant(String string) 77 { 78 this.bytes = null; 79 this.string = string; 80 } 81 82 83 /** 84 * Initializes the UTF-8 data with an array of bytes. 85 */ 86 public void setBytes(byte[] bytes) 87 { 88 this.bytes = bytes; 89 this.string = null; 90 } 91 92 93 /** 94 * Returns the UTF-8 data as an array of bytes. 95 */ 96 public byte[] getBytes() 97 { 98 try 99 { 100 switchToByteArrayRepresentation(); 101 } 102 catch (UnsupportedEncodingException ex) 103 { 104 throw new RuntimeException(ex.getMessage()); 105 } 106 107 return bytes; 108 } 109 110 111 /** 112 * Initializes the UTF-8 data with a String. 113 */ 114 public void setString(String utf8String) 115 { 116 this.bytes = null; 117 this.string = utf8String; 118 } 119 120 121 /** 122 * Returns the UTF-8 data as a String. 123 */ 124 public String getString() 125 { 126 try 127 { 128 switchToStringRepresentation(); 129 } 130 catch (UnsupportedEncodingException ex) 131 { 132 throw new RuntimeException(ex.getMessage()); 133 } 134 135 return string; 136 } 137 138 139 // Implementations for Constant. 140 141 public int getTag() 142 { 143 return ClassConstants.CONSTANT_Utf8; 144 } 145 146 public void accept(Clazz clazz, ConstantVisitor constantVisitor) 147 { 148 constantVisitor.visitUtf8Constant(clazz, this); 149 } 150 151 152 // Small utility methods. 153 154 /** 155 * Switches to a byte array representation of the UTF-8 data. 156 */ 157 private void switchToByteArrayRepresentation() throws UnsupportedEncodingException 158 { 159 if (bytes == null) 160 { 161 bytes = getByteArrayRepresentation(string); 162 string = null; 163 } 164 } 165 166 167 /** 168 * Switches to a String representation of the UTF-8 data. 169 */ 170 private void switchToStringRepresentation() throws UnsupportedEncodingException 171 { 172 if (string == null) 173 { 174 string = getStringRepresentation(bytes); 175 bytes = null; 176 } 177 } 178 179 180 /** 181 * Returns the modified UTF-8 byte array representation of the given string. 182 */ 183 private byte[] getByteArrayRepresentation(String string) throws UnsupportedEncodingException 184 { 185 // We're computing the byte array ourselves, because the implementation 186 // of String.getBytes("UTF-8") has a bug, at least up to JRE 1.4.2. 187 // Also note the special treatment of the 0 character. 188 189 // Compute the byte array length. 190 int byteLength = 0; 191 int stringLength = string.length(); 192 for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) 193 { 194 char c = string.charAt(stringIndex); 195 196 // The character is represented by one, two, or three bytes. 197 byteLength += c == 0 ? 2 : 198 c < TWO_BYTE_LIMIT ? 1 : 199 c < THREE_BYTE_LIMIT ? 2 : 200 3; 201 } 202 203 // Allocate the byte array with the computed length. 204 byte[] bytes = new byte[byteLength]; 205 206 // Fill out the array. 207 int byteIndex = 0; 208 for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) 209 { 210 char c = string.charAt(stringIndex); 211 if (c == 0) 212 { 213 // The 0 character gets a two-byte representation in classes. 214 bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT1; 215 bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT2; 216 } 217 else if (c < TWO_BYTE_LIMIT) 218 { 219 // The character is represented by a single byte. 220 bytes[byteIndex++] = (byte)c; 221 } 222 else if (c < THREE_BYTE_LIMIT) 223 { 224 // The character is represented by two bytes. 225 bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT1 | ((c >>> TWO_BYTE_SHIFT1) & TWO_BYTE_MASK1)); 226 bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT2 | ( c & TWO_BYTE_MASK2)); 227 } 228 else 229 { 230 // The character is represented by three bytes. 231 bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT1 | ((c >>> THREE_BYTE_SHIFT1) & THREE_BYTE_MASK1)); 232 bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT2 | ((c >>> THREE_BYTE_SHIFT2) & THREE_BYTE_MASK2)); 233 bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT3 | ( c & THREE_BYTE_MASK3)); 234 } 235 } 236 237 return bytes; 238 } 239 240 241 /** 242 * Returns the String representation of the given modified UTF-8 byte array. 243 */ 244 private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException 245 { 246 // We're computing the string ourselves, because the implementation 247 // of "new String(bytes)" doesn't honor the special treatment of 248 // the 0 character in JRE 1.6_u11. 249 250 // Allocate the byte array with the computed length. 251 char[] chars = new char[bytes.length]; 252 253 // Fill out the array. 254 int charIndex = 0; 255 int byteIndex = 0; 256 while (byteIndex < bytes.length) 257 { 258 259 int b = bytes[byteIndex++] & 0xff; 260 261 // Depending on the flag bits in the first byte, the character 262 // is represented by a single byte, by two bytes, or by three 263 // bytes. We're not checking the redundant flag bits in the 264 // second byte and the third byte. 265 try 266 { 267 chars[charIndex++] = 268 (char)(b < TWO_BYTE_CONSTANT1 ? b : 269 270 b < THREE_BYTE_CONSTANT1 ? ((b & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) | 271 ((bytes[byteIndex++] & TWO_BYTE_MASK2) ) : 272 273 ((b & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) | 274 ((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) | 275 ((bytes[byteIndex++] & THREE_BYTE_MASK3) )); 276 } 277 catch (ArrayIndexOutOfBoundsException e) 278 { 279 throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]"); 280 } 281 } 282 283 return new String(chars, 0, charIndex); 284 } 285} 286