1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package libcore.java.nio.charset; 18 19import java.nio.ByteBuffer; 20import java.nio.CharBuffer; 21import java.nio.charset.Charset; 22import java.nio.charset.CharsetEncoder; 23import java.nio.charset.CoderResult; 24import java.nio.charset.CodingErrorAction; 25import java.util.Arrays; 26 27public class CharsetEncoderTest extends junit.framework.TestCase { 28 // None of the harmony or jtreg tests actually check that replaceWith does the right thing! 29 public void test_replaceWith() throws Exception { 30 Charset ascii = Charset.forName("US-ASCII"); 31 CharsetEncoder e = ascii.newEncoder(); 32 e.onMalformedInput(CodingErrorAction.REPLACE); 33 e.onUnmappableCharacter(CodingErrorAction.REPLACE); 34 e.replaceWith("=".getBytes("US-ASCII")); 35 String input = "hello\u0666world"; 36 String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString(); 37 assertEquals("hello=world", output); 38 } 39 40 private void assertReplacementBytesForEncoder(String charset, byte[] bytes) { 41 byte[] result = Charset.forName(charset).newEncoder().replacement(); 42 assertEquals(Arrays.toString(bytes), Arrays.toString(result)); 43 } 44 45 // For all the guaranteed built-in charsets, check that we have the right default replacements. 46 public void test_defaultReplacementBytesIso_8859_1() throws Exception { 47 assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' }); 48 } 49 public void test_defaultReplacementBytesUs_Ascii() throws Exception { 50 assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' }); 51 } 52 public void test_defaultReplacementBytesUtf_16() throws Exception { 53 assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd }); 54 } 55 public void test_defaultReplacementBytesUtf_16be() throws Exception { 56 assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd }); 57 } 58 public void test_defaultReplacementBytesUtf_16le() throws Exception { 59 assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff }); 60 } 61 public void test_defaultReplacementBytesUtf_8() throws Exception { 62 assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' }); 63 } 64 65 public void testSurrogatePairAllAtOnce() throws Exception { 66 // okay: surrogate pair seen all at once is decoded to U+20b9f. 67 Charset cs = Charset.forName("UTF-32BE"); 68 CharsetEncoder e = cs.newEncoder(); 69 ByteBuffer bb = ByteBuffer.allocate(128); 70 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false); 71 assertEquals(CoderResult.UNDERFLOW, cr); 72 assertEquals(4, bb.position()); 73 assertEquals((byte) 0x00, bb.get(0)); 74 assertEquals((byte) 0x02, bb.get(1)); 75 assertEquals((byte) 0x0b, bb.get(2)); 76 assertEquals((byte) 0x9f, bb.get(3)); 77 } 78 79 public void testMalformedSurrogatePair() throws Exception { 80 // malformed: low surrogate first is detected as an error. 81 Charset cs = Charset.forName("UTF-32BE"); 82 CharsetEncoder e = cs.newEncoder(); 83 ByteBuffer bb = ByteBuffer.allocate(128); 84 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); 85 assertTrue(cr.toString(), cr.isMalformed()); 86 assertEquals(1, cr.length()); 87 } 88 89 public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE_RI() throws Exception { 90 testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.IGNORE); 91 } 92 93 public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT_RI() throws Exception { 94 testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPORT); 95 } 96 97 public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE_RI() throws Exception { 98 testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPLACE); 99 } 100 101 private void testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction cea) throws Exception { 102 // stupid: on the RI, writing the two halves of the surrogate pair in separate writes 103 // is an error because the CharsetEncoder doesn't remember it's half-way through a 104 // surrogate pair across the two calls! 105 106 // IGNORE just ignores both characters, REPORT complains that the second is 107 // invalid (because it doesn't remember seeing the first), and REPLACE inserts a 108 // replacement character U+fffd when it sees the second character (because it too 109 // doesn't remember seeing the first). 110 111 Charset cs = Charset.forName("UTF-32BE"); 112 CharsetEncoder e = cs.newEncoder(); 113 e.onMalformedInput(cea); 114 e.onUnmappableCharacter(cea); 115 ByteBuffer bb = ByteBuffer.allocate(128); 116 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false); 117 assertEquals(CoderResult.UNDERFLOW, cr); 118 assertEquals(0, bb.position()); 119 cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); 120 if (cea == CodingErrorAction.REPORT) { 121 assertTrue(cr.toString(), cr.isMalformed()); 122 assertEquals(1, cr.length()); 123 return; 124 } 125 assertEquals(CoderResult.UNDERFLOW, cr); 126 int expectedPosition = 0; 127 if (cea == CodingErrorAction.REPLACE) { 128 expectedPosition = 4; 129 assertEquals(expectedPosition, bb.position()); 130 System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position()))); 131 assertEquals((byte) 0x00, bb.get(0)); 132 assertEquals((byte) 0x00, bb.get(1)); 133 assertEquals((byte) 0xff, bb.get(2)); 134 assertEquals((byte) 0xfd, bb.get(3)); 135 } 136 assertEquals(expectedPosition, bb.position()); 137 cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true); 138 assertEquals(CoderResult.UNDERFLOW, cr); 139 assertEquals(expectedPosition, bb.position()); 140 cr = e.flush(bb); 141 assertEquals(CoderResult.UNDERFLOW, cr); 142 assertEquals(expectedPosition, bb.position()); 143 } 144 145 public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE() throws Exception { 146 testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.IGNORE); 147 } 148 149 public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT() throws Exception { 150 testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPORT); 151 } 152 153 public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE() throws Exception { 154 testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPLACE); 155 } 156 157 private void testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction cea) throws Exception { 158 // Writing the two halves of the surrogate pair in separate writes works just fine. 159 // This is true of Android and ICU, but not of the RI. 160 Charset cs = Charset.forName("UTF-32BE"); 161 CharsetEncoder e = cs.newEncoder(); 162 e.onMalformedInput(cea); 163 e.onUnmappableCharacter(cea); 164 ByteBuffer bb = ByteBuffer.allocate(128); 165 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false); 166 assertEquals(CoderResult.UNDERFLOW, cr); 167 assertEquals(0, bb.position()); 168 cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); 169 assertEquals(CoderResult.UNDERFLOW, cr); 170 int expectedPosition = 4; 171 assertEquals(expectedPosition, bb.position()); 172 System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position()))); 173 assertEquals((byte) 0x00, bb.get(0)); 174 assertEquals((byte) 0x02, bb.get(1)); 175 assertEquals((byte) 0x0b, bb.get(2)); 176 assertEquals((byte) 0x9f, bb.get(3)); 177 cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true); 178 assertEquals(CoderResult.UNDERFLOW, cr); 179 assertEquals(expectedPosition, bb.position()); 180 cr = e.flush(bb); 181 assertEquals(CoderResult.UNDERFLOW, cr); 182 assertEquals(expectedPosition, bb.position()); 183 } 184 185 public void testFlushWithoutEndOfInput() throws Exception { 186 Charset cs = Charset.forName("UTF-32BE"); 187 CharsetEncoder e = cs.newEncoder(); 188 ByteBuffer bb = ByteBuffer.allocate(128); 189 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false); 190 assertEquals(CoderResult.UNDERFLOW, cr); 191 assertEquals(4, bb.position()); 192 try { 193 cr = e.flush(bb); 194 } catch (IllegalStateException expected) { 195 // you must call encode with endOfInput true before you can flush. 196 } 197 198 // We had a bug where we wouldn't reset inEnd before calling encode in implFlush. 199 // That would result in flush outputting garbage. 200 cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true); 201 assertEquals(CoderResult.UNDERFLOW, cr); 202 assertEquals(8, bb.position()); 203 cr = e.flush(bb); 204 assertEquals(CoderResult.UNDERFLOW, cr); 205 assertEquals(8, bb.position()); 206 } 207} 208