1/** 2******************************************************************************* 3* Copyright (C) 1996-2006, International Business Machines Corporation and * 4* others. All Rights Reserved. * 5******************************************************************************* 6* 7******************************************************************************* 8*/ 9/** 10 * A JNI interface for ICU converters. 11 * 12 * 13 * @author Ram Viswanadha, IBM 14 */ 15package com.ibm.icu4jni.charset; 16 17import java.nio.ByteBuffer; 18import java.nio.CharBuffer; 19import java.nio.charset.Charset; 20import java.nio.charset.CharsetEncoder; 21import java.nio.charset.CoderResult; 22import java.nio.charset.CodingErrorAction; 23 24import com.ibm.icu4jni.common.ErrorCode; 25// BEGIN android-removed 26// import com.ibm.icu4jni.converters.NativeConverter; 27// END android-removed 28 29public final class CharsetEncoderICU extends CharsetEncoder { 30 31 private static final int INPUT_OFFSET = 0, 32 OUTPUT_OFFSET = 1, 33 INVALID_CHARS = 2, 34 INPUT_HELD = 3, 35 LIMIT = 4; 36 /* data is 3 element array where 37 * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed 38 * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written 39 * data[INVALID_CHARS] = number of invalid chars 40 * data[INPUT_HELD] = number of input chars held in the converter's state 41 */ 42 private int[] data = new int[LIMIT]; 43 /* handle to the ICU converter that is opened */ 44 private long converterHandle=0; 45 46 private char[] input = null; 47 private byte[] output = null; 48 49 // BEGIN android-added 50 private char[] allocatedInput = null; 51 private byte[] allocatedOutput = null; 52 // END android-added 53 54 // These instance variables are 55 // always assigned in the methods 56 // before being used. This class 57 // inhrently multithread unsafe 58 // so we dont have to worry about 59 // synchronization 60 private int inEnd; 61 private int outEnd; 62 private int ec; 63 private int savedInputHeldLen; 64 private int onUnmappableInput = NativeConverter.STOP_CALLBACK;; 65 private int onMalformedInput = NativeConverter.STOP_CALLBACK;; 66 67 /** 68 * Construcs a new encoder for the given charset 69 * @param cs for which the decoder is created 70 * @param cHandle the address of ICU converter 71 * @param replacement the substitution bytes 72 * @stable ICU 2.4 73 */ 74 public CharsetEncoderICU(Charset cs, long cHandle, byte[] replacement) { 75 super( 76 cs, 77 (float) NativeConverter.getAveBytesPerChar(cHandle), 78 (float) NativeConverter.getMaxBytesPerChar(cHandle), 79 replacement); 80 byte[] sub = replacement(); 81 // The default callback action on unmappable input 82 // or malformed input is to ignore so we set ICU converter 83 // callback to stop and report the error 84 ec = NativeConverter.setCallbackEncode( cHandle, 85 onMalformedInput, 86 onUnmappableInput, 87 sub, sub.length); 88 converterHandle = cHandle; 89 if (ErrorCode.isFailure(ec)) { 90 throw ErrorCode.getException(ec); 91 } 92 } 93 94 /** 95 * Sets this encoders replacement string. Substitutes the string in output if an 96 * umappable or illegal sequence is encountered 97 * @param newReplacement to replace the error chars with 98 * @stable ICU 2.4 99 */ 100 protected void implReplaceWith(byte[] newReplacement) { 101 if (converterHandle != 0) { 102 if (newReplacement.length 103 > NativeConverter.getMaxBytesPerChar(converterHandle)) { 104 throw new IllegalArgumentException("Number of replacement Bytes are greater than max bytes per char"); 105 } 106 ec = NativeConverter.setSubstitutionBytes(converterHandle, 107 newReplacement, 108 newReplacement.length); 109 if (ErrorCode.isFailure(ec)) { 110 throw ErrorCode.getException(ec); 111 } 112 } 113 } 114 115 /** 116 * Sets the action to be taken if an illegal sequence is encountered 117 * @param newAction action to be taken 118 * @exception IllegalArgumentException 119 * @stable ICU 2.4 120 */ 121 protected void implOnMalformedInput(CodingErrorAction newAction) { 122 onMalformedInput = NativeConverter.STOP_CALLBACK; 123 124 if (newAction.equals(CodingErrorAction.IGNORE)) { 125 onMalformedInput = NativeConverter.SKIP_CALLBACK; 126 } else if (newAction.equals(CodingErrorAction.REPLACE)) { 127 onMalformedInput = NativeConverter.SUBSTITUTE_CALLBACK; 128 } 129 byte[] sub = replacement(); 130 ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length); 131 if (ErrorCode.isFailure(ec)) { 132 throw ErrorCode.getException(ec); 133 } 134 135 } 136 137 /** 138 * Sets the action to be taken if an illegal sequence is encountered 139 * @param newAction action to be taken 140 * @exception IllegalArgumentException 141 * @stable ICU 2.4 142 */ 143 protected void implOnUnmappableCharacter(CodingErrorAction newAction) { 144 onUnmappableInput = NativeConverter.STOP_CALLBACK; 145 146 if (newAction.equals(CodingErrorAction.IGNORE)) { 147 onUnmappableInput = NativeConverter.SKIP_CALLBACK; 148 } else if (newAction.equals(CodingErrorAction.REPLACE)) { 149 onUnmappableInput = NativeConverter.SUBSTITUTE_CALLBACK; 150 } 151 byte[] sub = replacement(); 152 ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length); 153 if (ErrorCode.isFailure(ec)) { 154 throw ErrorCode.getException(ec); 155 } 156 } 157 158 /** 159 * Flushes any characters saved in the converter's internal buffer and 160 * resets the converter. 161 * @param out action to be taken 162 * @return result of flushing action and completes the decoding all input. 163 * Returns CoderResult.UNDERFLOW if the action succeeds. 164 * @stable ICU 2.4 165 */ 166 protected CoderResult implFlush(ByteBuffer out) { 167 try { 168 data[OUTPUT_OFFSET] = getArray(out); 169 ec = NativeConverter.flushCharToByte(converterHandle,/* Handle to ICU Converter */ 170 output, /* output array of chars */ 171 outEnd, /* output index+1 to be written */ 172 data /* contains data, inOff,outOff */ 173 ); 174 175 /* If we don't have room for the output, throw an exception*/ 176 if (ErrorCode.isFailure(ec)) { 177 if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) { 178 return CoderResult.OVERFLOW; 179 }else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling 180 if(data[INPUT_OFFSET]>0){ 181 return CoderResult.malformedForLength(data[INPUT_OFFSET]); 182 } 183 }else { 184 ErrorCode.getException(ec); 185 } 186 } 187 return CoderResult.UNDERFLOW; 188 } finally { 189 setPosition(out); 190 implReset(); 191 } 192 } 193 194 /** 195 * Resets the from Unicode mode of converter 196 * @stable ICU 2.4 197 */ 198 protected void implReset() { 199 NativeConverter.resetCharToByte(converterHandle); 200 data[INPUT_OFFSET] = 0; 201 data[OUTPUT_OFFSET] = 0; 202 data[INVALID_CHARS] = 0; 203 data[INPUT_HELD] = 0; 204 savedInputHeldLen = 0; 205 } 206 207 /** 208 * Encodes one or more chars. The default behaviour of the 209 * converter is stop and report if an error in input stream is encountered. 210 * To set different behaviour use @see CharsetEncoder.onMalformedInput() 211 * @param in buffer to decode 212 * @param out buffer to populate with decoded result 213 * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding 214 * action succeeds or more input is needed for completing the decoding action. 215 * @stable ICU 2.4 216 */ 217 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 218 219 if (!in.hasRemaining()) { 220 return CoderResult.UNDERFLOW; 221 } 222 223 data[INPUT_OFFSET] = getArray(in); 224 data[OUTPUT_OFFSET]= getArray(out); 225 data[INPUT_HELD] = 0; 226 // BEGIN android-added 227 data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors. 228 // END android added 229 230 try { 231 /* do the conversion */ 232 ec = NativeConverter.encode(converterHandle,/* Handle to ICU Converter */ 233 input, /* input array of bytes */ 234 inEnd, /* last index+1 to be converted */ 235 output, /* output array of chars */ 236 outEnd, /* output index+1 to be written */ 237 data, /* contains data, inOff,outOff */ 238 false /* donot flush the data */ 239 ); 240 if (ErrorCode.isFailure(ec)) { 241 /* If we don't have room for the output return error */ 242 if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) { 243 return CoderResult.OVERFLOW; 244 } else if (ec == ErrorCode.U_INVALID_CHAR_FOUND) { 245 return CoderResult.unmappableForLength(data[INVALID_CHARS]); 246 } else if (ec == ErrorCode.U_ILLEGAL_CHAR_FOUND) { 247 // in.position(in.position() - 1); 248 return CoderResult.malformedForLength(data[INVALID_CHARS]); 249 } 250 } 251 return CoderResult.UNDERFLOW; 252 } finally { 253 /* save state */ 254 setPosition(in); 255 setPosition(out); 256 } 257 } 258 259 /** 260 * Ascertains if a given Unicode character can 261 * be converted to the target encoding 262 * 263 * @param c the character to be converted 264 * @return true if a character can be converted 265 * @stable ICU 2.4 266 * 267 */ 268 public boolean canEncode(char c) { 269 return canEncode((int) c); 270 } 271 272 /** 273 * Ascertains if a given Unicode code point (32bit value for handling surrogates) 274 * can be converted to the target encoding. If the caller wants to test if a 275 * surrogate pair can be converted to target encoding then the 276 * responsibility of assembling the int value lies with the caller. 277 * For assembling a code point the caller can use UTF16 class of ICU4J and do something like: 278 * <pre> 279 * while(i<mySource.length){ 280 * if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){ 281 * if(UTF16.isTrailSurrogate(mySource[i+1])){ 282 * int temp = UTF16.charAt(mySource,i,i+1,0); 283 * if(!((CharsetEncoderICU) myConv).canEncode(temp)){ 284 * passed=false; 285 * } 286 * i++; 287 * i++; 288 * } 289 * } 290 * } 291 * </pre> 292 * or 293 * <pre> 294 * String src = new String(mySource); 295 * int i,codepoint; 296 * boolean passed = false; 297 * while(i<src.length()){ 298 * codepoint = UTF16.charAt(src,i); 299 * i+= (codepoint>0xfff)? 2:1; 300 * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){ 301 * passed = false; 302 * } 303 * } 304 * </pre> 305 * 306 * @param codepoint Unicode code point as int value 307 * @return true if a character can be converted 308 * @obsolete ICU 2.4 309 * @deprecated ICU 3.4 310 */ 311 public boolean canEncode(int codepoint) { 312 return NativeConverter.canEncode(converterHandle, codepoint); 313 } 314 315 /** 316 * Releases the system resources by cleanly closing ICU converter opened 317 * @exception Throwable exception thrown by super class' finalize method 318 * @stable ICU 2.4 319 */ 320 protected void finalize() throws Throwable { 321 NativeConverter.closeConverter(converterHandle); 322 super.finalize(); 323 converterHandle=0; 324 } 325 326 //------------------------------------------ 327 // private utility methods 328 //------------------------------------------ 329 private final int getArray(ByteBuffer out) { 330 if(out.hasArray()){ 331 // BEGIN android-changed: take arrayOffset into account 332 output = out.array(); 333 outEnd = out.arrayOffset() + out.limit(); 334 return out.arrayOffset() + out.position(); 335 // END android-changed 336 }else{ 337 outEnd = out.remaining(); 338 // BEGIN android-added 339 if (allocatedOutput == null || (outEnd > allocatedOutput.length)) { 340 allocatedOutput = new byte[outEnd]; 341 } 342 output = allocatedOutput; 343 // END android-added 344 //since the new 345 // buffer start position 346 // is 0 347 return 0; 348 } 349 } 350 351 private final int getArray(CharBuffer in) { 352 if(in.hasArray()){ 353 // BEGIN android-changed: take arrayOffset into account 354 input = in.array(); 355 inEnd = in.arrayOffset() + in.limit(); 356 return in.arrayOffset() + in.position() + savedInputHeldLen;/*exclude the number fo bytes held in previous conversion*/ 357 // END android-changed 358 }else{ 359 inEnd = in.remaining(); 360 // BEGIN android-added 361 if (allocatedInput == null || (inEnd > allocatedInput.length)) { 362 allocatedInput = new char[inEnd]; 363 } 364 input = allocatedInput; 365 // END android-added 366 // save the current position 367 int pos = in.position(); 368 in.get(input,0,inEnd); 369 // reset the position 370 in.position(pos); 371 // the start position 372 // of the new buffer 373 // is whatever is savedInputLen 374 return savedInputHeldLen; 375 } 376 377 } 378 private final void setPosition(ByteBuffer out) { 379 380 if (out.hasArray()) { 381 // in getArray method we accessed the 382 // array backing the buffer directly and wrote to 383 // it, so just just set the position and return. 384 // This is done to avoid the creation of temp array. 385 // BEGIN android-changed: take arrayOffset into account 386 out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset()); 387 // END android-changed 388 } else { 389 out.put(output, 0, data[OUTPUT_OFFSET]); 390 } 391 // BEGIN android-added 392 // release reference to output array, which may not be ours 393 output = null; 394 // END android-added 395 } 396 private final void setPosition(CharBuffer in){ 397 398// BEGIN android-removed 399// // was there input held in the previous invocation of encodeLoop 400// // that resulted in output in this invocation? 401// if(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0){ 402// int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen; 403// in.position(len); 404// savedInputHeldLen = data[INPUT_HELD]; 405// }else{ 406// in.position(in.position() + data[INPUT_OFFSET] + savedInputHeldLen); 407// savedInputHeldLen = data[INPUT_HELD]; 408// in.position(in.position() - savedInputHeldLen); 409// } 410// END android-removed 411 412// BEGIN android-added 413 // Slightly rewired original code to make it cleaner. Also 414 // added a fix for the problem where input charatcers got 415 // lost when invalid characters were encountered. Not sure 416 // what happens when data[INVALID_CHARS] is > 1, though, 417 // since we never saw that happening. 418 int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen; 419 len -= data[INVALID_CHARS]; // Otherwise position becomes wrong. 420 in.position(len); 421 savedInputHeldLen = data[INPUT_HELD]; 422 // was there input held in the previous invocation of encodeLoop 423 // that resulted in output in this invocation? 424 if(!(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0)){ 425 in.position(in.position() - savedInputHeldLen); 426 } 427// END android-added 428 429 // BEGIN android-added 430 // release reference to input array, which may not be ours 431 input = null; 432 // END android-added 433 } 434} 435