1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.dex;
18
19import com.android.dex.util.ByteInput;
20import java.io.UTFDataFormatException;
21
22/**
23 * Modified UTF-8 as described in the dex file format spec.
24 *
25 * <p>Derived from libcore's MUTF-8 encoder at java.nio.charset.ModifiedUtf8.
26 */
27public final class Mutf8 {
28    private Mutf8() {}
29
30    /**
31     * Decodes bytes from {@code in} into {@code out} until a delimiter 0x00 is
32     * encountered. Returns a new string containing the decoded characters.
33     */
34    public static String decode(ByteInput in, char[] out) throws UTFDataFormatException {
35        int s = 0;
36        while (true) {
37            char a = (char) (in.readByte() & 0xff);
38            if (a == 0) {
39                return new String(out, 0, s);
40            }
41            out[s] = a;
42            if (a < '\u0080') {
43                s++;
44            } else if ((a & 0xe0) == 0xc0) {
45                int b = in.readByte() & 0xff;
46                if ((b & 0xC0) != 0x80) {
47                    throw new UTFDataFormatException("bad second byte");
48                }
49                out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
50            } else if ((a & 0xf0) == 0xe0) {
51                int b = in.readByte() & 0xff;
52                int c = in.readByte() & 0xff;
53                if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
54                    throw new UTFDataFormatException("bad second or third byte");
55                }
56                out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
57            } else {
58                throw new UTFDataFormatException("bad byte");
59            }
60        }
61    }
62
63    /**
64     * Returns the number of bytes the modified UTF8 representation of 's' would take.
65     */
66    private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
67        long result = 0;
68        final int length = s.length();
69        for (int i = 0; i < length; ++i) {
70            char ch = s.charAt(i);
71            if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
72                ++result;
73            } else if (ch <= 2047) {
74                result += 2;
75            } else {
76                result += 3;
77            }
78            if (shortLength && result > 65535) {
79                throw new UTFDataFormatException("String more than 65535 UTF bytes long");
80            }
81        }
82        return result;
83    }
84
85    /**
86     * Encodes the modified UTF-8 bytes corresponding to {@code s} into  {@code
87     * dst}, starting at {@code offset}.
88     */
89    public static void encode(byte[] dst, int offset, String s) {
90        final int length = s.length();
91        for (int i = 0; i < length; i++) {
92            char ch = s.charAt(i);
93            if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
94                dst[offset++] = (byte) ch;
95            } else if (ch <= 2047) {
96                dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6)));
97                dst[offset++] = (byte) (0x80 | (0x3f & ch));
98            } else {
99                dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12)));
100                dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6)));
101                dst[offset++] = (byte) (0x80 | (0x3f & ch));
102            }
103        }
104    }
105
106    /**
107     * Returns an array containing the <i>modified UTF-8</i> form of {@code s}.
108     */
109    public static byte[] encode(String s) throws UTFDataFormatException {
110        int utfCount = (int) countBytes(s, true);
111        byte[] result = new byte[utfCount];
112        encode(result, 0, s);
113        return result;
114    }
115}
116