1/**
2*******************************************************************************
3* Copyright (C) 1996-2006, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                  *
5*******************************************************************************
6*
7*******************************************************************************
8*/
9/**
10 * A JNI interface for ICU converters.
11 *
12 *
13 * @author Ram Viswanadha, IBM
14 */
15package com.ibm.icu4jni.charset;
16
17import java.nio.ByteBuffer;
18import java.nio.CharBuffer;
19import java.nio.charset.Charset;
20import java.nio.charset.CharsetEncoder;
21import java.nio.charset.CoderResult;
22import java.nio.charset.CodingErrorAction;
23
24import com.ibm.icu4jni.common.ErrorCode;
25// BEGIN android-removed
26// import com.ibm.icu4jni.converters.NativeConverter;
27// END android-removed
28
29public final class CharsetEncoderICU extends CharsetEncoder {
30
31    private static final int INPUT_OFFSET = 0,
32                             OUTPUT_OFFSET = 1,
33                             INVALID_CHARS  = 2,
34                             INPUT_HELD     = 3,
35                             LIMIT          = 4;
36    /* data is 3 element array where
37     * data[INPUT_OFFSET]   = on input contains the start of input and on output the number of input chars consumed
38     * data[OUTPUT_OFFSET]  = on input contains the start of output and on output the number of output bytes written
39     * data[INVALID_CHARS]  = number of invalid chars
40     * data[INPUT_HELD]     = number of input chars held in the converter's state
41     */
42    private int[] data = new int[LIMIT];
43    /* handle to the ICU converter that is opened */
44    private long converterHandle=0;
45
46    private char[] input = null;
47    private byte[] output = null;
48
49    // BEGIN android-added
50    private char[] allocatedInput = null;
51    private byte[] allocatedOutput = null;
52    // END android-added
53
54    // These instance variables are
55    // always assigned in the methods
56    // before being used. This class
57    // inhrently multithread unsafe
58    // so we dont have to worry about
59    // synchronization
60    private int inEnd;
61    private int outEnd;
62    private int ec;
63    private int savedInputHeldLen;
64    private int onUnmappableInput = NativeConverter.STOP_CALLBACK;;
65    private int onMalformedInput = NativeConverter.STOP_CALLBACK;;
66
67    /**
68     * Construcs a new encoder for the given charset
69     * @param cs for which the decoder is created
70     * @param cHandle the address of ICU converter
71     * @param replacement the substitution bytes
72     * @stable ICU 2.4
73     */
74    public CharsetEncoderICU(Charset cs, long cHandle, byte[] replacement) {
75        super(
76            cs,
77            (float) NativeConverter.getAveBytesPerChar(cHandle),
78            (float) NativeConverter.getMaxBytesPerChar(cHandle),
79            replacement);
80        byte[] sub = replacement();
81        // The default callback action on unmappable input
82        // or malformed input is to ignore so we set ICU converter
83        // callback to stop and report the error
84        ec = NativeConverter.setCallbackEncode( cHandle,
85                                                onMalformedInput,
86                                                onUnmappableInput,
87                                                sub, sub.length);
88        converterHandle = cHandle;
89        if (ErrorCode.isFailure(ec)) {
90            throw ErrorCode.getException(ec);
91        }
92    }
93
94    /**
95     * Sets this encoders replacement string. Substitutes the string in output if an
96     * umappable or illegal sequence is encountered
97     * @param newReplacement to replace the error chars with
98     * @stable ICU 2.4
99     */
100    protected void implReplaceWith(byte[] newReplacement) {
101        if (converterHandle != 0) {
102            if (newReplacement.length
103                > NativeConverter.getMaxBytesPerChar(converterHandle)) {
104                throw new IllegalArgumentException("Number of replacement Bytes are greater than max bytes per char");
105            }
106            ec = NativeConverter.setSubstitutionBytes(converterHandle,
107                                                      newReplacement,
108                                                      newReplacement.length);
109            if (ErrorCode.isFailure(ec)) {
110                throw ErrorCode.getException(ec);
111            }
112        }
113    }
114
115    /**
116     * Sets the action to be taken if an illegal sequence is encountered
117     * @param newAction action to be taken
118     * @exception IllegalArgumentException
119     * @stable ICU 2.4
120     */
121    protected void implOnMalformedInput(CodingErrorAction newAction) {
122        onMalformedInput = NativeConverter.STOP_CALLBACK;
123
124        if (newAction.equals(CodingErrorAction.IGNORE)) {
125            onMalformedInput = NativeConverter.SKIP_CALLBACK;
126        } else if (newAction.equals(CodingErrorAction.REPLACE)) {
127            onMalformedInput = NativeConverter.SUBSTITUTE_CALLBACK;
128        }
129        byte[] sub = replacement();
130        ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length);
131        if (ErrorCode.isFailure(ec)) {
132            throw ErrorCode.getException(ec);
133        }
134
135    }
136
137    /**
138     * Sets the action to be taken if an illegal sequence is encountered
139     * @param newAction action to be taken
140     * @exception IllegalArgumentException
141     * @stable ICU 2.4
142     */
143    protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
144        onUnmappableInput = NativeConverter.STOP_CALLBACK;
145
146        if (newAction.equals(CodingErrorAction.IGNORE)) {
147            onUnmappableInput = NativeConverter.SKIP_CALLBACK;
148        } else if (newAction.equals(CodingErrorAction.REPLACE)) {
149            onUnmappableInput = NativeConverter.SUBSTITUTE_CALLBACK;
150        }
151        byte[] sub = replacement();
152        ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length);
153        if (ErrorCode.isFailure(ec)) {
154            throw ErrorCode.getException(ec);
155        }
156    }
157
158    /**
159     * Flushes any characters saved in the converter's internal buffer and
160     * resets the converter.
161     * @param out action to be taken
162     * @return result of flushing action and completes the decoding all input.
163     *       Returns CoderResult.UNDERFLOW if the action succeeds.
164     * @stable ICU 2.4
165     */
166    protected CoderResult implFlush(ByteBuffer out) {
167        try {
168            data[OUTPUT_OFFSET] = getArray(out);
169            ec = NativeConverter.flushCharToByte(converterHandle,/* Handle to ICU Converter */
170                                                 output, /* output array of chars */
171                                                 outEnd, /* output index+1 to be written */
172                                                 data /* contains data, inOff,outOff */
173                                                );
174
175            /* If we don't have room for the output, throw an exception*/
176            if (ErrorCode.isFailure(ec)) {
177                if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) {
178                    return CoderResult.OVERFLOW;
179                }else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling
180                    if(data[INPUT_OFFSET]>0){
181                        return CoderResult.malformedForLength(data[INPUT_OFFSET]);
182                    }
183                }else {
184                    ErrorCode.getException(ec);
185                }
186            }
187            return CoderResult.UNDERFLOW;
188        } finally {
189            setPosition(out);
190            implReset();
191        }
192    }
193
194    /**
195     * Resets the from Unicode mode of converter
196     * @stable ICU 2.4
197     */
198    protected void implReset() {
199        NativeConverter.resetCharToByte(converterHandle);
200        data[INPUT_OFFSET] = 0;
201        data[OUTPUT_OFFSET] = 0;
202        data[INVALID_CHARS] = 0;
203        data[INPUT_HELD] = 0;
204        savedInputHeldLen = 0;
205    }
206
207    /**
208     * Encodes one or more chars. The default behaviour of the
209     * converter is stop and report if an error in input stream is encountered.
210     * To set different behaviour use @see CharsetEncoder.onMalformedInput()
211     * @param in buffer to decode
212     * @param out buffer to populate with decoded result
213     * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
214     *       action succeeds or more input is needed for completing the decoding action.
215     * @stable ICU 2.4
216     */
217    protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
218
219        if (!in.hasRemaining()) {
220            return CoderResult.UNDERFLOW;
221        }
222
223        data[INPUT_OFFSET] = getArray(in);
224        data[OUTPUT_OFFSET]= getArray(out);
225        data[INPUT_HELD] = 0;
226        // BEGIN android-added
227        data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors.
228        // END android added
229
230        try {
231            /* do the conversion */
232            ec = NativeConverter.encode(converterHandle,/* Handle to ICU Converter */
233                                        input, /* input array of bytes */
234                                        inEnd, /* last index+1 to be converted */
235                                        output, /* output array of chars */
236                                        outEnd, /* output index+1 to be written */
237                                        data, /* contains data, inOff,outOff */
238                                        false /* donot flush the data */
239                                        );
240            if (ErrorCode.isFailure(ec)) {
241                /* If we don't have room for the output return error */
242                if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) {
243                    return CoderResult.OVERFLOW;
244                } else if (ec == ErrorCode.U_INVALID_CHAR_FOUND) {
245                    return CoderResult.unmappableForLength(data[INVALID_CHARS]);
246                } else if (ec == ErrorCode.U_ILLEGAL_CHAR_FOUND) {
247                    // in.position(in.position() - 1);
248                    return CoderResult.malformedForLength(data[INVALID_CHARS]);
249                }
250            }
251            return CoderResult.UNDERFLOW;
252        } finally {
253            /* save state */
254            setPosition(in);
255            setPosition(out);
256        }
257    }
258
259    /**
260     * Ascertains if a given Unicode character can
261     * be converted to the target encoding
262     *
263     * @param  c the character to be converted
264     * @return true if a character can be converted
265     * @stable ICU 2.4
266     *
267     */
268    public boolean canEncode(char c) {
269        return canEncode((int) c);
270    }
271
272    /**
273     * Ascertains if a given Unicode code point (32bit value for handling surrogates)
274     * can be converted to the target encoding. If the caller wants to test if a
275     * surrogate pair can be converted to target encoding then the
276     * responsibility of assembling the int value lies with the caller.
277     * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
278     * <pre>
279     * while(i<mySource.length){
280     *      if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
281     *          if(UTF16.isTrailSurrogate(mySource[i+1])){
282     *              int temp = UTF16.charAt(mySource,i,i+1,0);
283     *              if(!((CharsetEncoderICU) myConv).canEncode(temp)){
284     *          passed=false;
285     *              }
286     *              i++;
287     *              i++;
288     *          }
289     *     }
290     * }
291     * </pre>
292     * or
293     * <pre>
294     * String src = new String(mySource);
295     * int i,codepoint;
296     * boolean passed = false;
297     * while(i<src.length()){
298     *    codepoint = UTF16.charAt(src,i);
299     *    i+= (codepoint>0xfff)? 2:1;
300     *    if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
301     *        passed = false;
302     *    }
303     * }
304     * </pre>
305     *
306     * @param codepoint Unicode code point as int value
307     * @return true if a character can be converted
308     * @obsolete ICU 2.4
309     * @deprecated ICU 3.4
310     */
311    public boolean canEncode(int codepoint) {
312        return NativeConverter.canEncode(converterHandle, codepoint);
313    }
314
315    /**
316     * Releases the system resources by cleanly closing ICU converter opened
317     * @exception Throwable exception thrown by super class' finalize method
318     * @stable ICU 2.4
319     */
320    protected void finalize() throws Throwable {
321        NativeConverter.closeConverter(converterHandle);
322        super.finalize();
323        converterHandle=0;
324    }
325
326    //------------------------------------------
327    // private utility methods
328    //------------------------------------------
329    private final int getArray(ByteBuffer out) {
330        if(out.hasArray()){
331            // BEGIN android-changed: take arrayOffset into account
332            output = out.array();
333            outEnd = out.arrayOffset() + out.limit();
334            return out.arrayOffset() + out.position();
335            // END android-changed
336        }else{
337            outEnd = out.remaining();
338            // BEGIN android-added
339            if (allocatedOutput == null || (outEnd > allocatedOutput.length)) {
340                allocatedOutput = new byte[outEnd];
341            }
342            output = allocatedOutput;
343            // END android-added
344            //since the new
345            // buffer start position
346            // is 0
347            return 0;
348        }
349    }
350
351    private final int getArray(CharBuffer in) {
352        if(in.hasArray()){
353            // BEGIN android-changed: take arrayOffset into account
354            input = in.array();
355            inEnd = in.arrayOffset() + in.limit();
356            return in.arrayOffset() + in.position() + savedInputHeldLen;/*exclude the number fo bytes held in previous conversion*/
357            // END android-changed
358        }else{
359            inEnd = in.remaining();
360            // BEGIN android-added
361            if (allocatedInput == null || (inEnd > allocatedInput.length)) {
362                allocatedInput = new char[inEnd];
363            }
364            input = allocatedInput;
365            // END android-added
366            // save the current position
367            int pos = in.position();
368            in.get(input,0,inEnd);
369            // reset the position
370            in.position(pos);
371            // the start position
372            // of the new buffer
373            // is whatever is savedInputLen
374            return savedInputHeldLen;
375        }
376
377    }
378    private final void setPosition(ByteBuffer out) {
379
380        if (out.hasArray()) {
381            // in getArray method we accessed the
382            // array backing the buffer directly and wrote to
383            // it, so just just set the position and return.
384            // This is done to avoid the creation of temp array.
385            // BEGIN android-changed: take arrayOffset into account
386            out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset());
387            // END android-changed
388        } else {
389            out.put(output, 0, data[OUTPUT_OFFSET]);
390        }
391        // BEGIN android-added
392        // release reference to output array, which may not be ours
393        output = null;
394        // END android-added
395    }
396    private final void setPosition(CharBuffer in){
397
398// BEGIN android-removed
399//        // was there input held in the previous invocation of encodeLoop
400//        // that resulted in output in this invocation?
401//        if(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0){
402//            int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen;
403//            in.position(len);
404//            savedInputHeldLen = data[INPUT_HELD];
405//        }else{
406//            in.position(in.position() + data[INPUT_OFFSET] + savedInputHeldLen);
407//            savedInputHeldLen = data[INPUT_HELD];
408//            in.position(in.position() - savedInputHeldLen);
409//        }
410// END android-removed
411
412// BEGIN android-added
413        // Slightly rewired original code to make it cleaner. Also
414        // added a fix for the problem where input charatcers got
415        // lost when invalid characters were encountered. Not sure
416        // what happens when data[INVALID_CHARS] is > 1, though,
417        // since we never saw that happening.
418        int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen;
419        len -= data[INVALID_CHARS]; // Otherwise position becomes wrong.
420        in.position(len);
421        savedInputHeldLen = data[INPUT_HELD];
422        // was there input held in the previous invocation of encodeLoop
423        // that resulted in output in this invocation?
424        if(!(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0)){
425            in.position(in.position() - savedInputHeldLen);
426        }
427// END android-added
428
429        // BEGIN android-added
430        // release reference to input array, which may not be ours
431        input = null;
432        // END android-added
433    }
434}
435