12bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson/* 22bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Copyright (C) 2011 The Android Open Source Project 32bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * 42bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Licensed under the Apache License, Version 2.0 (the "License"); 52bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * you may not use this file except in compliance with the License. 62bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * You may obtain a copy of the License at 72bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * 82bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * http://www.apache.org/licenses/LICENSE-2.0 92bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * 102bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Unless required by applicable law or agreed to in writing, software 112bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * distributed under the License is distributed on an "AS IS" BASIS, 122bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * See the License for the specific language governing permissions and 142bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * limitations under the License. 152bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */ 162bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson 172bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonpackage com.android.dex; 182bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson 192bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonimport com.android.dex.util.ByteInput; 202bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonimport java.io.UTFDataFormatException; 212bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson 222bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson/** 232bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Modified UTF-8 as described in the dex file format spec. 242bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * 252bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * <p>Derived from libcore's MUTF-8 encoder at java.nio.charset.ModifiedUtf8. 262bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */ 272bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonpublic final class Mutf8 { 282bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson private Mutf8() {} 292bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson 302bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson /** 312bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Decodes bytes from {@code in} into {@code out} until a delimiter 0x00 is 322bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * encountered. Returns a new string containing the decoded characters. 332bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */ 342bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson public static String decode(ByteInput in, char[] out) throws UTFDataFormatException { 352bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson int s = 0; 362bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson while (true) { 372bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson char a = (char) (in.readByte() & 0xff); 382bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson if (a == 0) { 392bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson return new String(out, 0, s); 402bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 412bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson out[s] = a; 422bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson if (a < '\u0080') { 432bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson s++; 442bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } else if ((a & 0xe0) == 0xc0) { 452bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson int b = in.readByte() & 0xff; 462bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson if ((b & 0xC0) != 0x80) { 472bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson throw new UTFDataFormatException("bad second byte"); 482bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 492bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F)); 502bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } else if ((a & 0xf0) == 0xe0) { 512bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson int b = in.readByte() & 0xff; 522bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson int c = in.readByte() & 0xff; 532bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) { 542bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson throw new UTFDataFormatException("bad second or third byte"); 552bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 562bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F)); 572bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } else { 582bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson throw new UTFDataFormatException("bad byte"); 592bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 602bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 612bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 622bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson 632bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson /** 642bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Returns the number of bytes the modified UTF8 representation of 's' would take. 652bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */ 662bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { 672bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson long result = 0; 682bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson final int length = s.length(); 692bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson for (int i = 0; i < length; ++i) { 702bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson char ch = s.charAt(i); 712bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. 722bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson ++result; 732bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } else if (ch <= 2047) { 742bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson result += 2; 752bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } else { 762bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson result += 3; 772bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 782bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson if (shortLength && result > 65535) { 792bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson throw new UTFDataFormatException("String more than 65535 UTF bytes long"); 802bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 812bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 822bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson return result; 832bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 842bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson 852bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson /** 862bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Encodes the modified UTF-8 bytes corresponding to {@code s} into {@code 872bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * dst}, starting at {@code offset}. 882bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */ 892bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson public static void encode(byte[] dst, int offset, String s) { 902bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson final int length = s.length(); 912bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson for (int i = 0; i < length; i++) { 922bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson char ch = s.charAt(i); 932bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. 942bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson dst[offset++] = (byte) ch; 952bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } else if (ch <= 2047) { 962bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6))); 972bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson dst[offset++] = (byte) (0x80 | (0x3f & ch)); 982bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } else { 992bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12))); 1002bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6))); 1012bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson dst[offset++] = (byte) (0x80 | (0x3f & ch)); 1022bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 1032bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 1042bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 1052bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson 1062bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson /** 1072bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Returns an array containing the <i>modified UTF-8</i> form of {@code s}. 1082bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */ 1092bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson public static byte[] encode(String s) throws UTFDataFormatException { 1102bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson int utfCount = (int) countBytes(s, true); 1112bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson byte[] result = new byte[utfCount]; 1122bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson encode(result, 0, s); 1132bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson return result; 1142bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson } 1152bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson} 116