1/** 2******************************************************************************* 3* Copyright (C) 1996-2006, International Business Machines Corporation and * 4* others. All Rights Reserved. * 5******************************************************************************* 6* 7******************************************************************************* 8*/ 9/** 10 * A JNI interface for ICU converters. 11 * 12 * 13 * @author Ram Viswanadha, IBM 14 */ 15package java.nio.charset; 16 17import java.nio.ByteBuffer; 18import java.nio.CharBuffer; 19import java.util.HashMap; 20import java.util.Map; 21import libcore.icu.ICU; 22import libcore.icu.NativeConverter; 23import libcore.util.EmptyArray; 24 25final class CharsetEncoderICU extends CharsetEncoder { 26 private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>(); 27 static { 28 // ICU has different default replacements to the RI in some cases. There are many 29 // additional cases, but this covers all the charsets that Java guarantees will be 30 // available, which is where compatibility seems most important. (The RI even uses 31 // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that 32 // byte corresponds to an entirely different character.) 33 // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it 34 // can represent it, but this is what the RI does... 35 byte[] questionMark = new byte[] { (byte) '?' }; 36 DEFAULT_REPLACEMENTS.put("UTF-8", questionMark); 37 DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark); 38 DEFAULT_REPLACEMENTS.put("US-ASCII", questionMark); 39 } 40 41 private static final int INPUT_OFFSET = 0; 42 private static final int OUTPUT_OFFSET = 1; 43 private static final int INVALID_CHARS = 2; 44 /* 45 * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed 46 * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written 47 * data[INVALID_CHARS] = number of invalid chars 48 */ 49 private int[] data = new int[3]; 50 51 /* handle to the ICU converter that is opened */ 52 private long converterHandle=0; 53 54 private char[] input = null; 55 private byte[] output = null; 56 57 private char[] allocatedInput = null; 58 private byte[] allocatedOutput = null; 59 60 // These instance variables are always assigned in the methods before being used. This class 61 // is inherently thread-unsafe so we don't have to worry about synchronization. 62 private int inEnd; 63 private int outEnd; 64 65 public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) { 66 // This complexity is necessary to ensure that even if the constructor, superclass 67 // constructor, or call to updateCallback throw, we still free the native peer. 68 long address = 0; 69 try { 70 address = NativeConverter.openConverter(icuCanonicalName); 71 float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address); 72 float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address); 73 byte[] replacement = makeReplacement(icuCanonicalName, address); 74 CharsetEncoderICU result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address); 75 address = 0; // CharsetEncoderICU has taken ownership; its finalizer will do the free. 76 return result; 77 } finally { 78 if (address != 0) { 79 NativeConverter.closeConverter(address); 80 } 81 } 82 } 83 84 private static byte[] makeReplacement(String icuCanonicalName, long address) { 85 // We have our own map of RI-compatible default replacements (where ICU disagrees)... 86 byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName); 87 if (replacement != null) { 88 return replacement.clone(); 89 } 90 // ...but fall back to asking ICU. 91 return NativeConverter.getSubstitutionBytes(address); 92 } 93 94 private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) { 95 super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true); 96 // Our native peer needs to know what just happened... 97 this.converterHandle = address; 98 updateCallback(); 99 } 100 101 @Override protected void implReplaceWith(byte[] newReplacement) { 102 updateCallback(); 103 } 104 105 @Override protected void implOnMalformedInput(CodingErrorAction newAction) { 106 updateCallback(); 107 } 108 109 @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) { 110 updateCallback(); 111 } 112 113 private void updateCallback() { 114 NativeConverter.setCallbackEncode(converterHandle, this); 115 } 116 117 @Override protected void implReset() { 118 NativeConverter.resetCharToByte(converterHandle); 119 data[INPUT_OFFSET] = 0; 120 data[OUTPUT_OFFSET] = 0; 121 data[INVALID_CHARS] = 0; 122 output = null; 123 input = null; 124 allocatedInput = null; 125 allocatedOutput = null; 126 inEnd = 0; 127 outEnd = 0; 128 } 129 130 @Override protected CoderResult implFlush(ByteBuffer out) { 131 try { 132 // ICU needs to see an empty input. 133 input = EmptyArray.CHAR; 134 inEnd = 0; 135 data[INPUT_OFFSET] = 0; 136 137 data[OUTPUT_OFFSET] = getArray(out); 138 data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors. 139 140 int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true); 141 if (ICU.U_FAILURE(error)) { 142 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) { 143 return CoderResult.OVERFLOW; 144 } else if (error == ICU.U_TRUNCATED_CHAR_FOUND) { 145 if (data[INPUT_OFFSET] > 0) { 146 return CoderResult.malformedForLength(data[INPUT_OFFSET]); 147 } 148 } 149 } 150 return CoderResult.UNDERFLOW; 151 } finally { 152 setPosition(out); 153 implReset(); 154 } 155 } 156 157 @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 158 if (!in.hasRemaining()) { 159 return CoderResult.UNDERFLOW; 160 } 161 162 data[INPUT_OFFSET] = getArray(in); 163 data[OUTPUT_OFFSET]= getArray(out); 164 data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors. 165 166 try { 167 int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false); 168 if (ICU.U_FAILURE(error)) { 169 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) { 170 return CoderResult.OVERFLOW; 171 } else if (error == ICU.U_INVALID_CHAR_FOUND) { 172 return CoderResult.unmappableForLength(data[INVALID_CHARS]); 173 } else if (error == ICU.U_ILLEGAL_CHAR_FOUND) { 174 return CoderResult.malformedForLength(data[INVALID_CHARS]); 175 } else { 176 throw new AssertionError(error); 177 } 178 } 179 // Decoding succeeded: give us more data. 180 return CoderResult.UNDERFLOW; 181 } finally { 182 setPosition(in); 183 setPosition(out); 184 } 185 } 186 187 @Override protected void finalize() throws Throwable { 188 try { 189 NativeConverter.closeConverter(converterHandle); 190 converterHandle=0; 191 } finally { 192 super.finalize(); 193 } 194 } 195 196 private int getArray(ByteBuffer out) { 197 if (out.hasArray()) { 198 output = out.array(); 199 outEnd = out.arrayOffset() + out.limit(); 200 return out.arrayOffset() + out.position(); 201 } else { 202 outEnd = out.remaining(); 203 if (allocatedOutput == null || outEnd > allocatedOutput.length) { 204 allocatedOutput = new byte[outEnd]; 205 } 206 // The array's start position is 0 207 output = allocatedOutput; 208 return 0; 209 } 210 } 211 212 private int getArray(CharBuffer in) { 213 if (in.hasArray()) { 214 input = in.array(); 215 inEnd = in.arrayOffset() + in.limit(); 216 return in.arrayOffset() + in.position(); 217 } else { 218 inEnd = in.remaining(); 219 if (allocatedInput == null || inEnd > allocatedInput.length) { 220 allocatedInput = new char[inEnd]; 221 } 222 // Copy the input buffer into the allocated array. 223 int pos = in.position(); 224 in.get(allocatedInput, 0, inEnd); 225 in.position(pos); 226 // The array's start position is 0 227 input = allocatedInput; 228 return 0; 229 } 230 } 231 232 private void setPosition(ByteBuffer out) { 233 if (out.hasArray()) { 234 out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset()); 235 } else { 236 out.put(output, 0, data[OUTPUT_OFFSET]); 237 } 238 // release reference to output array, which may not be ours 239 output = null; 240 } 241 242 private void setPosition(CharBuffer in) { 243 in.position(in.position() + data[INPUT_OFFSET] - data[INVALID_CHARS]); 244 // release reference to input array, which may not be ours 245 input = null; 246 } 247} 248