1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package libcore.java.nio.charset;
18
19import java.nio.ByteBuffer;
20import java.nio.CharBuffer;
21import java.nio.charset.Charset;
22import java.nio.charset.CharsetEncoder;
23import java.nio.charset.CoderResult;
24import java.nio.charset.CodingErrorAction;
25import java.util.Arrays;
26
27public class CharsetEncoderTest extends junit.framework.TestCase {
28    // None of the harmony or jtreg tests actually check that replaceWith does the right thing!
29    public void test_replaceWith() throws Exception {
30        Charset ascii = Charset.forName("US-ASCII");
31        CharsetEncoder e = ascii.newEncoder();
32        e.onMalformedInput(CodingErrorAction.REPLACE);
33        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
34        e.replaceWith("=".getBytes("US-ASCII"));
35        String input = "hello\u0666world";
36        String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
37        assertEquals("hello=world", output);
38    }
39
40    private void assertReplacementBytesForEncoder(String charset, byte[] bytes) {
41        byte[] result = Charset.forName(charset).newEncoder().replacement();
42        assertEquals(Arrays.toString(bytes), Arrays.toString(result));
43    }
44
45    // For all the guaranteed built-in charsets, check that we have the right default replacements.
46    public void test_defaultReplacementBytesIso_8859_1() throws Exception {
47        assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' });
48    }
49    public void test_defaultReplacementBytesUs_Ascii() throws Exception {
50        assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' });
51    }
52    public void test_defaultReplacementBytesUtf_16() throws Exception {
53        assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd });
54    }
55    public void test_defaultReplacementBytesUtf_16be() throws Exception {
56        assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd });
57    }
58    public void test_defaultReplacementBytesUtf_16le() throws Exception {
59        assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff });
60    }
61    public void test_defaultReplacementBytesUtf_8() throws Exception {
62        assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' });
63    }
64
65    public void testSurrogatePairAllAtOnce() throws Exception {
66        // okay: surrogate pair seen all at once is decoded to U+20b9f.
67        Charset cs = Charset.forName("UTF-32BE");
68        CharsetEncoder e = cs.newEncoder();
69        ByteBuffer bb = ByteBuffer.allocate(128);
70        CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false);
71        assertEquals(CoderResult.UNDERFLOW, cr);
72        assertEquals(4, bb.position());
73        assertEquals((byte) 0x00, bb.get(0));
74        assertEquals((byte) 0x02, bb.get(1));
75        assertEquals((byte) 0x0b, bb.get(2));
76        assertEquals((byte) 0x9f, bb.get(3));
77    }
78
79    public void testMalformedSurrogatePair() throws Exception {
80        // malformed: low surrogate first is detected as an error.
81        Charset cs = Charset.forName("UTF-32BE");
82        CharsetEncoder e = cs.newEncoder();
83        ByteBuffer bb = ByteBuffer.allocate(128);
84        CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
85        assertTrue(cr.toString(), cr.isMalformed());
86        assertEquals(1, cr.length());
87    }
88
89    public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE_RI() throws Exception {
90        testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.IGNORE);
91    }
92
93    public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT_RI() throws Exception {
94        testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPORT);
95    }
96
97    public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE_RI() throws Exception {
98        testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPLACE);
99    }
100
101    private void testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction cea) throws Exception {
102        // stupid: on the RI, writing the two halves of the surrogate pair in separate writes
103        // is an error because the CharsetEncoder doesn't remember it's half-way through a
104        // surrogate pair across the two calls!
105
106        // IGNORE just ignores both characters, REPORT complains that the second is
107        // invalid (because it doesn't remember seeing the first), and REPLACE inserts a
108        // replacement character U+fffd when it sees the second character (because it too
109        // doesn't remember seeing the first).
110
111        Charset cs = Charset.forName("UTF-32BE");
112        CharsetEncoder e = cs.newEncoder();
113        e.onMalformedInput(cea);
114        e.onUnmappableCharacter(cea);
115        ByteBuffer bb = ByteBuffer.allocate(128);
116        CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
117        assertEquals(CoderResult.UNDERFLOW, cr);
118        assertEquals(0, bb.position());
119        cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
120        if (cea == CodingErrorAction.REPORT) {
121            assertTrue(cr.toString(), cr.isMalformed());
122            assertEquals(1, cr.length());
123            return;
124        }
125        assertEquals(CoderResult.UNDERFLOW, cr);
126        int expectedPosition = 0;
127        if (cea == CodingErrorAction.REPLACE) {
128            expectedPosition = 4;
129            assertEquals(expectedPosition, bb.position());
130            System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
131            assertEquals((byte) 0x00, bb.get(0));
132            assertEquals((byte) 0x00, bb.get(1));
133            assertEquals((byte) 0xff, bb.get(2));
134            assertEquals((byte) 0xfd, bb.get(3));
135        }
136        assertEquals(expectedPosition, bb.position());
137        cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
138        assertEquals(CoderResult.UNDERFLOW, cr);
139        assertEquals(expectedPosition, bb.position());
140        cr = e.flush(bb);
141        assertEquals(CoderResult.UNDERFLOW, cr);
142        assertEquals(expectedPosition, bb.position());
143    }
144
145    public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE() throws Exception {
146        testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.IGNORE);
147    }
148
149    public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT() throws Exception {
150        testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPORT);
151    }
152
153    public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE() throws Exception {
154        testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPLACE);
155    }
156
157    private void testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction cea) throws Exception {
158        // Writing the two halves of the surrogate pair in separate writes works just fine.
159        // This is true of Android and ICU, but not of the RI.
160        Charset cs = Charset.forName("UTF-32BE");
161        CharsetEncoder e = cs.newEncoder();
162        e.onMalformedInput(cea);
163        e.onUnmappableCharacter(cea);
164        ByteBuffer bb = ByteBuffer.allocate(128);
165        CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
166        assertEquals(CoderResult.UNDERFLOW, cr);
167        assertEquals(0, bb.position());
168        cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
169        assertEquals(CoderResult.UNDERFLOW, cr);
170        int expectedPosition = 4;
171        assertEquals(expectedPosition, bb.position());
172        System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
173        assertEquals((byte) 0x00, bb.get(0));
174        assertEquals((byte) 0x02, bb.get(1));
175        assertEquals((byte) 0x0b, bb.get(2));
176        assertEquals((byte) 0x9f, bb.get(3));
177        cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
178        assertEquals(CoderResult.UNDERFLOW, cr);
179        assertEquals(expectedPosition, bb.position());
180        cr = e.flush(bb);
181        assertEquals(CoderResult.UNDERFLOW, cr);
182        assertEquals(expectedPosition, bb.position());
183    }
184
185    public void testFlushWithoutEndOfInput() throws Exception {
186        Charset cs = Charset.forName("UTF-32BE");
187        CharsetEncoder e = cs.newEncoder();
188        ByteBuffer bb = ByteBuffer.allocate(128);
189        CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false);
190        assertEquals(CoderResult.UNDERFLOW, cr);
191        assertEquals(4, bb.position());
192        try {
193            cr = e.flush(bb);
194        } catch (IllegalStateException expected) {
195            // you must call encode with endOfInput true before you can flush.
196        }
197
198        // We had a bug where we wouldn't reset inEnd before calling encode in implFlush.
199        // That would result in flush outputting garbage.
200        cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true);
201        assertEquals(CoderResult.UNDERFLOW, cr);
202        assertEquals(8, bb.position());
203        cr = e.flush(bb);
204        assertEquals(CoderResult.UNDERFLOW, cr);
205        assertEquals(8, bb.position());
206    }
207}
208