12bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson/*
22bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Copyright (C) 2011 The Android Open Source Project
32bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson *
42bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Licensed under the Apache License, Version 2.0 (the "License");
52bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * you may not use this file except in compliance with the License.
62bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * You may obtain a copy of the License at
72bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson *
82bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson *      http://www.apache.org/licenses/LICENSE-2.0
92bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson *
102bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Unless required by applicable law or agreed to in writing, software
112bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * distributed under the License is distributed on an "AS IS" BASIS,
122bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
132bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * See the License for the specific language governing permissions and
142bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * limitations under the License.
152bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */
162bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson
172bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonpackage com.android.dex;
182bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson
192bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonimport com.android.dex.util.ByteInput;
202bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonimport java.io.UTFDataFormatException;
212bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson
222bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson/**
232bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * Modified UTF-8 as described in the dex file format spec.
242bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson *
252bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson * <p>Derived from libcore's MUTF-8 encoder at java.nio.charset.ModifiedUtf8.
262bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson */
272bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilsonpublic final class Mutf8 {
282bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    private Mutf8() {}
292bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson
302bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    /**
312bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     * Decodes bytes from {@code in} into {@code out} until a delimiter 0x00 is
322bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     * encountered. Returns a new string containing the decoded characters.
332bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     */
342bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    public static String decode(ByteInput in, char[] out) throws UTFDataFormatException {
352bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        int s = 0;
362bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        while (true) {
372bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            char a = (char) (in.readByte() & 0xff);
382bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            if (a == 0) {
392bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                return new String(out, 0, s);
402bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            }
412bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            out[s] = a;
422bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            if (a < '\u0080') {
432bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                s++;
442bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            } else if ((a & 0xe0) == 0xc0) {
452bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                int b = in.readByte() & 0xff;
462bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                if ((b & 0xC0) != 0x80) {
472bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                    throw new UTFDataFormatException("bad second byte");
482bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                }
492bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
502bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            } else if ((a & 0xf0) == 0xe0) {
512bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                int b = in.readByte() & 0xff;
522bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                int c = in.readByte() & 0xff;
532bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
542bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                    throw new UTFDataFormatException("bad second or third byte");
552bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                }
562bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
572bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            } else {
582bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                throw new UTFDataFormatException("bad byte");
592bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            }
602bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        }
612bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    }
622bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson
632bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    /**
642bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     * Returns the number of bytes the modified UTF8 representation of 's' would take.
652bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     */
662bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
672bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        long result = 0;
682bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        final int length = s.length();
692bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        for (int i = 0; i < length; ++i) {
702bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            char ch = s.charAt(i);
712bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
722bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                ++result;
732bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            } else if (ch <= 2047) {
742bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                result += 2;
752bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            } else {
762bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                result += 3;
772bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            }
782bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            if (shortLength && result > 65535) {
792bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                throw new UTFDataFormatException("String more than 65535 UTF bytes long");
802bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            }
812bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        }
822bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        return result;
832bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    }
842bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson
852bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    /**
862bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     * Encodes the modified UTF-8 bytes corresponding to {@code s} into  {@code
872bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     * dst}, starting at {@code offset}.
882bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     */
892bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    public static void encode(byte[] dst, int offset, String s) {
902bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        final int length = s.length();
912bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        for (int i = 0; i < length; i++) {
922bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            char ch = s.charAt(i);
932bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
942bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                dst[offset++] = (byte) ch;
952bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            } else if (ch <= 2047) {
962bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6)));
972bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                dst[offset++] = (byte) (0x80 | (0x3f & ch));
982bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            } else {
992bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12)));
1002bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6)));
1012bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson                dst[offset++] = (byte) (0x80 | (0x3f & ch));
1022bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson            }
1032bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        }
1042bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    }
1052bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson
1062bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    /**
1072bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     * Returns an array containing the <i>modified UTF-8</i> form of {@code s}.
1082bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson     */
1092bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    public static byte[] encode(String s) throws UTFDataFormatException {
1102bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        int utfCount = (int) countBytes(s, true);
1112bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        byte[] result = new byte[utfCount];
1122bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        encode(result, 0, s);
1132bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson        return result;
1142bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson    }
1152bea5ee615b0f4add658d5660bd81c5145a0d05eJesse Wilson}
116