1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package libcore.java.nio.charset;
18
19import junit.framework.TestCase;
20
21import java.io.UTFDataFormatException;
22import java.nio.ByteBuffer;
23import java.nio.ByteOrder;
24import java.nio.charset.ModifiedUtf8;
25import java.util.Arrays;
26
27/**
28 * Tests for {@code ModifiedUtf8}.
29 */
30public class ModifiedUtf8Test extends TestCase {
31    public void test_decode_singleChar() throws Exception {
32        assertEquals("A", ModifiedUtf8.decode(new byte[] { 'A' }, new char[1], 0, 1));
33    }
34
35    public void test_decode_checkOffsetAndLength() throws Exception {
36        assertEquals("BC", ModifiedUtf8.decode(
37                new byte[] { 'A', 'B', 'C', 'D' }, new char[2], 1, 2));
38    }
39
40    public void test_decode_unexpectedEndOfStreamAfterC2_throws() {
41        // We need at least one byte after 0xc2.
42        try {
43            ModifiedUtf8.decode(new byte[]{'B', (byte) 0xc2}, new char[2], 0, 2);
44            fail("Should throw " + UTFDataFormatException.class.getName());
45        } catch(UTFDataFormatException expected) {
46            // Expected.
47        }
48    }
49
50    public void test_decode_unexpectedEndOfStreamAfterE0_throws() {
51        // We need at least two bytes after 0xe0.
52        try {
53            ModifiedUtf8.decode(
54                    new byte[] { 'B', (byte) 0xe0, (byte) 0xab }, new char[2], 0, 3);
55            fail("Should throw " + UTFDataFormatException.class.getName());
56        } catch(UTFDataFormatException expected) {
57            // Expected.
58        }
59    }
60
61    public void test_decode_endOfStreamAfterC2() throws Exception {
62        assertEquals("B\u00a0", ModifiedUtf8.decode(
63                new byte[] { 'B', (byte) 0xc2, (byte) 0xa0 },
64                new char[2],
65                0,
66                3));
67    }
68
69    public void test_decode_endOfStreamAfterE0() throws Exception {
70        assertEquals("B\u0830", ModifiedUtf8.decode(
71                new byte[] { 'B', (byte) 0xe0, (byte) 0xa0, (byte) 0xb0 },
72                new char[2],
73                0,
74                4));
75    }
76
77    public void test_decode_invalidByte_characterUnknown() throws Exception {
78        try {
79            ModifiedUtf8.decode(new byte[]{'A', (byte) 0xf0}, new char[2], 0, 2);
80            fail("Should throw " + UTFDataFormatException.class.getName());
81        } catch (UTFDataFormatException expected) {
82            // Expected.
83        }
84    }
85
86    public void test_decode_someC2Character() throws Exception {
87        assertEquals("A\u00a6", ModifiedUtf8.decode(
88                new byte[] { 'A', (byte) 0xc2, (byte) 0xa6 }, new char[2], 0, 3));
89    }
90
91    public void test_decode_lastC2Character() throws Exception {
92        assertEquals("A\u00bf", ModifiedUtf8.decode(
93                new byte[] { 'A', (byte) 0xc2, (byte) 0xbf }, new char[2], 0, 3));
94    }
95
96    public void test_decode_someTwoByteCharacter() throws Exception {
97        // Make sure bit masking works
98        assertEquals("A\u0606", ModifiedUtf8.decode(
99                new byte[] { 'A', (byte) 0xd8, (byte) 0x86 }, new char[3], 0, 3));
100    }
101
102    public void test_decode_lastTwoByteCharacter() throws Exception {
103        assertEquals("A\u07ff", ModifiedUtf8.decode(
104                new byte[] { 'A', (byte) 0xdf, (byte) 0xbf }, new char[2], 0, 3));
105    }
106
107    public void test_decode_firstE0Character() throws Exception {
108        assertEquals("A\u0800", ModifiedUtf8.decode(
109                new byte[] { 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0x80 },
110                new char[2],
111                0,
112                4));
113    }
114
115    public void test_decode_someThreeBytesCharacter() throws Exception {
116        assertEquals("A\u31c6", ModifiedUtf8.decode(
117                new byte[]{ 'A', (byte) 0xe3, (byte) 0x87, (byte) 0x86 },
118                new char[2],
119                0,
120                4));
121    }
122
123    public void test_decode_lastThreeBytesCharacter() throws Exception {
124        assertEquals("A\uffff", ModifiedUtf8.decode(
125                new byte[] { 'A', (byte) 0xef, (byte) 0xbf, (byte) 0xbf },
126                new char[2],
127                0,
128                4));
129    }
130
131    public void test_decode_twoByteCharacterAfterThreeByteCharacter() throws Exception {
132        assertEquals("\uffff\u0606A", ModifiedUtf8.decode(
133                new byte[] { (byte) 0xef, (byte) 0xbf, (byte) 0xbf, (byte) 0xd8, (byte) 0x86, 'A' },
134                new char[3],
135                0,
136                6));
137    }
138
139    public void test_decode_c080isZero() throws Exception {
140        assertEquals("A\u0000A", ModifiedUtf8.decode(
141                new byte[] { 'A', (byte) 0xc0, (byte) 0x80, 'A' }, new char[3], 0, 4));
142    }
143
144    public void test_decode_00isZero() throws Exception {
145        assertEquals("A\u0000A", ModifiedUtf8.decode(
146                new byte[] { 'A', (byte) 0, 'A' }, new char[3], 0, 3));
147    }
148
149    public void test_decode_insufficientOutputSpace_throws() throws Exception{
150        try {
151            ModifiedUtf8.decode(new byte[] { 'A', (byte) 0, 'A' }, new char[2], 0,  3);
152            fail("Should throw " + ArrayIndexOutOfBoundsException.class.getName());
153        } catch(ArrayIndexOutOfBoundsException expected) {
154            // Expected.
155        }
156    }
157
158    public void test_decode_checkBadSecondByteOfTwo() throws Exception {
159        try {
160            ModifiedUtf8.decode(new byte[]{(byte) 0xc0, (byte) 0xc0}, new char[2], 0, 2);
161            fail("Should throw " + UTFDataFormatException.class.getName());
162        } catch (UTFDataFormatException expected) {
163            // Expected.
164        }
165    }
166
167    public void test_decode_checkBadSecondByteOfThree() throws Exception{
168        try {
169            ModifiedUtf8.decode(new byte[]{
170                    (byte) 0xe0, (byte) 0xc0, (byte) 0x80}, new char[2], 0, 2);
171            fail("Should throw " + UTFDataFormatException.class.getName());
172        } catch (UTFDataFormatException expected) {
173            // Expected.
174        }
175    }
176
177    public void test_decode_checkBadThirdByteOfThree() throws Exception{
178        try {
179            ModifiedUtf8.decode(new byte[]{
180                    (byte) 0xe0, (byte) 0x80, (byte) 0xc0}, new char[2], 0, 2);
181            fail("Should throw " + UTFDataFormatException.class.getName());
182        } catch (UTFDataFormatException expected) {
183            // Expected.
184        }
185    }
186
187    public void test_decode_insufficientInput_throws() throws Exception{
188        try {
189            ModifiedUtf8.decode(new byte[] { 'A', (byte) 0, 'A' }, new char[8], 0,  100);
190            fail("Should throw " + ArrayIndexOutOfBoundsException.class.getName());
191        } catch(ArrayIndexOutOfBoundsException expected) {
192            // Expected.
193        }
194    }
195
196    public void test_decode_extraCharsInArray_ignored() throws Exception {
197        assertEquals("A", ModifiedUtf8.decode(new byte[] { 'A' }, new char[] { 'B', 'Z' }, 0,  1));
198    }
199
200    public void test_countBytes_rightCount() throws Exception {
201        assertEquals(0, ModifiedUtf8.countBytes("", false));
202        assertEquals(2, ModifiedUtf8.countBytes("\u0000", false));
203        assertEquals(1, ModifiedUtf8.countBytes("A", false));
204        assertEquals(1, ModifiedUtf8.countBytes("\u007f", false));
205        assertEquals(2, ModifiedUtf8.countBytes("\u0080", false));
206        assertEquals(2, ModifiedUtf8.countBytes("\u07ff", false));
207        assertEquals(3, ModifiedUtf8.countBytes("\u0800", false));
208        assertEquals(3, ModifiedUtf8.countBytes("\uffff", false));
209    }
210
211    public void test_countBytes_checkExceptionThrown() throws Exception {
212        // These two mustn't throw...
213        ModifiedUtf8.countBytes("", true);
214        ModifiedUtf8.countBytes("A", true);
215
216        char[] unsignedShortSizedCharArray = new char[2 * Short.MAX_VALUE + 1];
217        for (int i = 0; i < unsignedShortSizedCharArray.length; i++) {
218            unsignedShortSizedCharArray[i] = 'A';
219        }
220        String unsignedShortSizedString = String.copyValueOf(unsignedShortSizedCharArray);
221
222        char[] sizeLongerThanUnsignedShortCharArray = new char[2 * Short.MAX_VALUE + 2];
223        for (int i = 0; i < sizeLongerThanUnsignedShortCharArray.length; i++) {
224            sizeLongerThanUnsignedShortCharArray[i] = 'A';
225        }
226        String sizeLongerThanUnsignedShortString = String.copyValueOf(
227                sizeLongerThanUnsignedShortCharArray);
228
229        // Mustn't throw.
230        ModifiedUtf8.countBytes(unsignedShortSizedString, true);
231
232        try {
233            // Must throw.
234            ModifiedUtf8.countBytes(sizeLongerThanUnsignedShortString, true);
235            fail();
236        } catch (UTFDataFormatException expected) {
237            // Expected.
238        }
239
240        // Mustn't throw.
241        ModifiedUtf8.countBytes(unsignedShortSizedString, false);
242        ModifiedUtf8.countBytes(sizeLongerThanUnsignedShortString, false);
243    }
244
245    public void test_encode() throws Exception {
246        assertTrue(Arrays.equals(new byte[]{0, 1, 'A'}, ModifiedUtf8.encode("A")));
247        assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', 'B', 'C' }, ModifiedUtf8.encode("ABC")));
248        assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xa0 },
249                ModifiedUtf8.encode("A\u00a0")));
250        assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0xb0 },
251                ModifiedUtf8.encode("A\u0830")));
252        assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xa6 },
253                ModifiedUtf8.encode("A\u00a6")));
254        assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xbf },
255                ModifiedUtf8.encode("A\u00bf")));
256        assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xd8, (byte) 0x86 },
257                ModifiedUtf8.encode("A\u0606")));
258        assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xdf, (byte) 0xbf },
259                ModifiedUtf8.encode("A\u07ff")));
260        assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0x80 },
261                ModifiedUtf8.encode("A\u0800")));
262        assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe3, (byte) 0x87, (byte) 0x86 },
263                ModifiedUtf8.encode("A\u31c6")));
264        assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xef, (byte) 0xbf, (byte) 0xbf },
265                ModifiedUtf8.encode("A\uffff")));
266        assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc0, (byte) 0x80 },
267                ModifiedUtf8.encode("A\u0000")));
268        assertTrue(
269                Arrays.equals(new byte[] { 0, 8, (byte) 0xe3, (byte) 0x87, (byte) 0x86,
270                                (byte) 0xd8, (byte) 0x86, (byte) 0xc0, (byte) 0x80, 'A' },
271                ModifiedUtf8.encode("\u31c6\u0606\u0000A")));
272    }
273
274    public void test_encode_throws() throws Exception {
275        char[] unsignedShortSizedCharArray = new char[Short.MAX_VALUE * 2 + 1];
276        for (int i = 0; i < unsignedShortSizedCharArray.length; i++) {
277            unsignedShortSizedCharArray[i] = 'A';
278        }
279        String unsignedShortSizedString = String.copyValueOf(unsignedShortSizedCharArray);
280
281        char[] sizeLongerThanUnsignedShortCharArray = new char[Short.MAX_VALUE * 2 + 2];
282        for (int i = 0; i < sizeLongerThanUnsignedShortCharArray.length; i++) {
283            sizeLongerThanUnsignedShortCharArray[i] = 'A';
284        }
285        String sizeLongerThanUnsignedShortString =
286                String.copyValueOf(sizeLongerThanUnsignedShortCharArray);
287
288        // Mustn't throw.
289        ModifiedUtf8.encode(unsignedShortSizedString);
290        try {
291            // Must throw.
292            ModifiedUtf8.encode(sizeLongerThanUnsignedShortString);
293            fail("Should throw " + UTFDataFormatException.class.getName());
294        } catch (UTFDataFormatException expected) {
295            // Expected.
296        }
297    }
298
299    public void test_encode_lengthAtBeginning() throws Exception {
300        int testStringLength = 20000;
301        char[] charArray = new char[testStringLength];
302        for (int i = 0; i < charArray.length; i++) {
303            charArray[i] = 'A';
304        }
305        String testString = String.copyValueOf(charArray);
306
307        // Mustn't throw.
308        byte[] result = ModifiedUtf8.encode(testString);
309        ByteBuffer b = ByteBuffer.wrap(result);
310        b.order(ByteOrder.BIG_ENDIAN);
311        assertEquals(testStringLength, b.getShort());
312    }
313
314}
315