1/**
2*******************************************************************************
3* Copyright (C) 1996-2006, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                  *
5*******************************************************************************
6*
7*******************************************************************************
8*/
9/**
10 * A JNI interface for ICU converters.
11 *
12 *
13 * @author Ram Viswanadha, IBM
14 */
15package java.nio.charset;
16
17import java.nio.ByteBuffer;
18import java.nio.CharBuffer;
19import java.util.HashMap;
20import java.util.Map;
21import libcore.icu.ICU;
22import libcore.icu.NativeConverter;
23import libcore.util.EmptyArray;
24
25final class CharsetEncoderICU extends CharsetEncoder {
26    private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>();
27    static {
28        // ICU has different default replacements to the RI in some cases. There are many
29        // additional cases, but this covers all the charsets that Java guarantees will be
30        // available, which is where compatibility seems most important. (The RI even uses
31        // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that
32        // byte corresponds to an entirely different character.)
33        // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it
34        // can represent it, but this is what the RI does...
35        byte[] questionMark = new byte[] { (byte) '?' };
36        DEFAULT_REPLACEMENTS.put("UTF-8",      questionMark);
37        DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark);
38        DEFAULT_REPLACEMENTS.put("US-ASCII",   questionMark);
39    }
40
41    private static final int INPUT_OFFSET = 0;
42    private static final int OUTPUT_OFFSET = 1;
43    private static final int INVALID_CHARS = 2;
44    /*
45     * data[INPUT_OFFSET]   = on input contains the start of input and on output the number of input chars consumed
46     * data[OUTPUT_OFFSET]  = on input contains the start of output and on output the number of output bytes written
47     * data[INVALID_CHARS]  = number of invalid chars
48     */
49    private int[] data = new int[3];
50
51    /* handle to the ICU converter that is opened */
52    private long converterHandle=0;
53
54    private char[] input = null;
55    private byte[] output = null;
56
57    private char[] allocatedInput = null;
58    private byte[] allocatedOutput = null;
59
60    // These instance variables are always assigned in the methods before being used. This class
61    // is inherently thread-unsafe so we don't have to worry about synchronization.
62    private int inEnd;
63    private int outEnd;
64
65    public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) {
66        // This complexity is necessary to ensure that even if the constructor, superclass
67        // constructor, or call to updateCallback throw, we still free the native peer.
68        long address = 0;
69        try {
70            address = NativeConverter.openConverter(icuCanonicalName);
71            float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address);
72            float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address);
73            byte[] replacement = makeReplacement(icuCanonicalName, address);
74            CharsetEncoderICU result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address);
75            address = 0; // CharsetEncoderICU has taken ownership; its finalizer will do the free.
76            return result;
77        } finally {
78            if (address != 0) {
79                NativeConverter.closeConverter(address);
80            }
81        }
82    }
83
84    private static byte[] makeReplacement(String icuCanonicalName, long address) {
85        // We have our own map of RI-compatible default replacements (where ICU disagrees)...
86        byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName);
87        if (replacement != null) {
88            return replacement.clone();
89        }
90        // ...but fall back to asking ICU.
91        return NativeConverter.getSubstitutionBytes(address);
92    }
93
94    private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) {
95        super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true);
96        // Our native peer needs to know what just happened...
97        this.converterHandle = address;
98        updateCallback();
99    }
100
101    @Override protected void implReplaceWith(byte[] newReplacement) {
102        updateCallback();
103    }
104
105    @Override protected void implOnMalformedInput(CodingErrorAction newAction) {
106        updateCallback();
107    }
108
109    @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
110        updateCallback();
111    }
112
113    private void updateCallback() {
114        NativeConverter.setCallbackEncode(converterHandle, this);
115    }
116
117    @Override protected void implReset() {
118        NativeConverter.resetCharToByte(converterHandle);
119        data[INPUT_OFFSET] = 0;
120        data[OUTPUT_OFFSET] = 0;
121        data[INVALID_CHARS] = 0;
122        output = null;
123        input = null;
124        allocatedInput = null;
125        allocatedOutput = null;
126        inEnd = 0;
127        outEnd = 0;
128    }
129
130    @Override protected CoderResult implFlush(ByteBuffer out) {
131        try {
132            // ICU needs to see an empty input.
133            input = EmptyArray.CHAR;
134            inEnd = 0;
135            data[INPUT_OFFSET] = 0;
136
137            data[OUTPUT_OFFSET] = getArray(out);
138            data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors.
139
140            int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true);
141            if (ICU.U_FAILURE(error)) {
142                if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
143                    return CoderResult.OVERFLOW;
144                } else if (error == ICU.U_TRUNCATED_CHAR_FOUND) {
145                    if (data[INPUT_OFFSET] > 0) {
146                        return CoderResult.malformedForLength(data[INPUT_OFFSET]);
147                    }
148                }
149            }
150            return CoderResult.UNDERFLOW;
151        } finally {
152            setPosition(out);
153            implReset();
154        }
155    }
156
157    @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
158        if (!in.hasRemaining()) {
159            return CoderResult.UNDERFLOW;
160        }
161
162        data[INPUT_OFFSET] = getArray(in);
163        data[OUTPUT_OFFSET]= getArray(out);
164        data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors.
165
166        try {
167            int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false);
168            if (ICU.U_FAILURE(error)) {
169                if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
170                    return CoderResult.OVERFLOW;
171                } else if (error == ICU.U_INVALID_CHAR_FOUND) {
172                    return CoderResult.unmappableForLength(data[INVALID_CHARS]);
173                } else if (error == ICU.U_ILLEGAL_CHAR_FOUND) {
174                    return CoderResult.malformedForLength(data[INVALID_CHARS]);
175                } else {
176                    throw new AssertionError(error);
177                }
178            }
179            // Decoding succeeded: give us more data.
180            return CoderResult.UNDERFLOW;
181        } finally {
182            setPosition(in);
183            setPosition(out);
184        }
185    }
186
187    @Override protected void finalize() throws Throwable {
188        try {
189            NativeConverter.closeConverter(converterHandle);
190            converterHandle=0;
191        } finally {
192            super.finalize();
193        }
194    }
195
196    private int getArray(ByteBuffer out) {
197        if (out.hasArray()) {
198            output = out.array();
199            outEnd = out.arrayOffset() + out.limit();
200            return out.arrayOffset() + out.position();
201        } else {
202            outEnd = out.remaining();
203            if (allocatedOutput == null || outEnd > allocatedOutput.length) {
204                allocatedOutput = new byte[outEnd];
205            }
206            // The array's start position is 0
207            output = allocatedOutput;
208            return 0;
209        }
210    }
211
212    private int getArray(CharBuffer in) {
213        if (in.hasArray()) {
214            input = in.array();
215            inEnd = in.arrayOffset() + in.limit();
216            return in.arrayOffset() + in.position();
217        } else {
218            inEnd = in.remaining();
219            if (allocatedInput == null || inEnd > allocatedInput.length) {
220                allocatedInput = new char[inEnd];
221            }
222            // Copy the input buffer into the allocated array.
223            int pos = in.position();
224            in.get(allocatedInput, 0, inEnd);
225            in.position(pos);
226            // The array's start position is 0
227            input = allocatedInput;
228            return 0;
229        }
230    }
231
232    private void setPosition(ByteBuffer out) {
233        if (out.hasArray()) {
234            out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset());
235        } else {
236            out.put(output, 0, data[OUTPUT_OFFSET]);
237        }
238        // release reference to output array, which may not be ours
239        output = null;
240    }
241
242    private void setPosition(CharBuffer in) {
243        in.position(in.position() + data[INPUT_OFFSET] - data[INVALID_CHARS]);
244        // release reference to input array, which may not be ours
245        input = null;
246    }
247}
248