1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package java.nio.charset;
18
19import java.nio.BufferOverflowException;
20import java.nio.BufferUnderflowException;
21import java.nio.ByteBuffer;
22import java.nio.CharBuffer;
23import java.util.Arrays;
24
25/**
26 * Transforms a sequence of 16-bit Java characters to a byte sequence in some encoding.
27 *
28 * <p>The input character sequence is a {@link java.nio.CharBuffer CharBuffer} and the
29 * output byte sequence is a {@link java.nio.ByteBuffer ByteBuffer}.
30 *
31 * <p>Use {@link #encode(CharBuffer)} to encode an entire {@code CharBuffer} to a
32 * new {@code ByteBuffer}, or {@link #encode(CharBuffer, ByteBuffer, boolean)} for more
33 * control. When using the latter method, the entire operation proceeds as follows:
34 * <ol>
35 * <li>Invoke {@link #reset()} to reset the encoder if this instance has been used before.</li>
36 * <li>Invoke {@link #encode(CharBuffer, ByteBuffer, boolean) encode} with the {@code endOfInput}
37 * parameter set to false until additional input is not needed (as signaled by the return value).
38 * The input buffer must be filled and the output buffer must be flushed between invocations.
39 * <p>The {@link #encode(CharBuffer, ByteBuffer, boolean) encode} method will
40 * convert as many characters as possible, and the process won't stop until the
41 * input buffer has been exhausted, the output buffer has been filled, or an
42 * error has occurred. A {@link CoderResult CoderResult} instance will be
43 * returned to indicate the current state. The caller should fill the input buffer, flush
44 * the output buffer, or recovering from an error and try again, accordingly.
45 * </li>
46 * <li>Invoke {@link #encode(CharBuffer, ByteBuffer, boolean) encode} for the last time with
47 * {@code endOfInput} set to true.</li>
48 * <li>Invoke {@link #flush(ByteBuffer)} to flush remaining output.</li>
49 * </ol>
50 *
51 * <p>There are two classes of encoding error: <i>malformed input</i>
52 * signifies that the input character sequence is not legal, while <i>unmappable character</i>
53 * signifies that the input is legal but cannot be mapped to a byte sequence (because the charset
54 * cannot represent the character, for example).
55 *
56 * <p>Errors can be handled in three ways. The default is to
57 * {@link CodingErrorAction#REPORT report} the error to the caller. The alternatives are to
58 * {@link CodingErrorAction#IGNORE ignore} the error or {@link CodingErrorAction#REPLACE replace}
59 * the problematic input with the byte sequence returned by {@link #replacement}. The disposition
60 * for each of the two kinds of error can be set independently using the {@link #onMalformedInput}
61 * and {@link #onUnmappableCharacter} methods.
62 *
63 * <p>The default replacement bytes depend on the charset but can be overridden using the
64 * {@link #replaceWith} method.
65 *
66 * <p>This class is abstract and encapsulates many common operations of the
67 * encoding process for all charsets. Encoders for a specific charset should
68 * extend this class and need only to implement the
69 * {@link #encodeLoop(CharBuffer, ByteBuffer) encodeLoop} method for basic
70 * encoding. If a subclass maintains internal state, it should also override the
71 * {@link #implFlush(ByteBuffer) implFlush} and {@link #implReset() implReset} methods.
72 *
73 * <p>This class is not thread-safe.
74 *
75 * @see java.nio.charset.Charset
76 * @see java.nio.charset.CharsetDecoder
77 */
78public abstract class CharsetEncoder {
79    private static final String RESET = "RESET";
80    private static final String ONGOING = "ONGOING";
81    private static final String END_OF_INPUT = "END_OF_INPUT";
82    private static final String FLUSHED = "FLUSHED";
83
84    private final Charset charset;
85
86    private final float averageBytesPerChar;
87    private final float maxBytesPerChar;
88
89    private byte[] replacementBytes;
90
91    private String state = RESET;
92
93    private CodingErrorAction malformedInputAction = CodingErrorAction.REPORT;
94    private CodingErrorAction unmappableCharacterAction = CodingErrorAction.REPORT;
95
96    // decoder instance for this encoder's charset, used for replacement value checking
97    private CharsetDecoder decoder;
98
99    /**
100     * Constructs a new {@code CharsetEncoder} using the given parameters and
101     * the replacement byte array {@code { (byte) '?' }}.
102     */
103    protected CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar) {
104        this(cs, averageBytesPerChar, maxBytesPerChar, new byte[] { (byte) '?' });
105    }
106
107    /**
108     * Constructs a new <code>CharsetEncoder</code> using the given
109     * <code>Charset</code>, replacement byte array, average number and
110     * maximum number of bytes created by this encoder for one input character.
111     *
112     * @param cs
113     *            the <code>Charset</code> to be used by this encoder.
114     * @param averageBytesPerChar
115     *            average number of bytes created by this encoder for one single
116     *            input character, must be positive.
117     * @param maxBytesPerChar
118     *            maximum number of bytes which can be created by this encoder
119     *            for one single input character, must be positive.
120     * @param replacement
121     *            the replacement byte array, cannot be null or empty, its
122     *            length cannot be larger than <code>maxBytesPerChar</code>,
123     *            and must be a legal replacement, which can be justified by
124     *            {@link #isLegalReplacement(byte[]) isLegalReplacement}.
125     * @throws IllegalArgumentException
126     *             if any parameters are invalid.
127     */
128    protected CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement) {
129        this(cs, averageBytesPerChar, maxBytesPerChar, replacement, false);
130    }
131
132    CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, boolean trusted) {
133        if (averageBytesPerChar <= 0 || maxBytesPerChar <= 0) {
134            throw new IllegalArgumentException("averageBytesPerChar and maxBytesPerChar must both be positive");
135        }
136        if (averageBytesPerChar > maxBytesPerChar) {
137            throw new IllegalArgumentException("averageBytesPerChar is greater than maxBytesPerChar");
138        }
139        this.charset = cs;
140        this.averageBytesPerChar = averageBytesPerChar;
141        this.maxBytesPerChar = maxBytesPerChar;
142        if (trusted) {
143            // The RI enforces unnecessary restrictions on the replacement bytes. We trust ICU to
144            // know what it's doing. Doing so lets us support ICU's EUC-JP, SCSU, and Shift_JIS.
145            this.replacementBytes = replacement;
146        } else {
147            replaceWith(replacement);
148        }
149    }
150
151    /**
152     * Returns the average number of bytes created by this encoder for a single
153     * input character.
154     */
155    public final float averageBytesPerChar() {
156        return averageBytesPerChar;
157    }
158
159    /**
160     * Tests whether the given character can be encoded by this encoder.
161     *
162     * <p>Note that this method may change the internal state of this encoder, so
163     * it should not be called when another encoding process is ongoing,
164     * otherwise it will throw an <code>IllegalStateException</code>.
165     *
166     * @throws IllegalStateException if another encode process is ongoing.
167     */
168    public boolean canEncode(char c) {
169        return canEncode(CharBuffer.wrap(new char[] { c }));
170    }
171
172    /**
173     * Tests whether the given <code>CharSequence</code> can be encoded by this
174     * encoder.
175     *
176     * <p>Note that this method may change the internal state of this encoder, so
177     * it should not be called when another encode process is ongoing, otherwise
178     * it will throw an <code>IllegalStateException</code>.
179     *
180     * @throws IllegalStateException if another encode process is ongoing.
181     */
182    public boolean canEncode(CharSequence sequence) {
183        CharBuffer cb;
184        if (sequence instanceof CharBuffer) {
185            cb = ((CharBuffer) sequence).duplicate();
186        } else {
187            cb = CharBuffer.wrap(sequence);
188        }
189
190        if (state == FLUSHED) {
191            reset();
192        }
193        if (state != RESET) {
194            throw illegalStateException();
195        }
196
197        CodingErrorAction originalMalformedInputAction = malformedInputAction;
198        CodingErrorAction originalUnmappableCharacterAction = unmappableCharacterAction;
199        onMalformedInput(CodingErrorAction.REPORT);
200        onUnmappableCharacter(CodingErrorAction.REPORT);
201        try {
202            encode(cb);
203            return true;
204        } catch (CharacterCodingException e) {
205            return false;
206        } finally {
207            onMalformedInput(originalMalformedInputAction);
208            onUnmappableCharacter(originalUnmappableCharacterAction);
209            reset();
210        }
211    }
212
213    /**
214     * Returns the {@link Charset} which this encoder uses.
215     */
216    public final Charset charset() {
217        return charset;
218    }
219
220    /**
221     * This is a facade method for the encoding operation.
222     * <p>
223     * This method encodes the remaining character sequence of the given
224     * character buffer into a new byte buffer. This method performs a complete
225     * encoding operation, resets at first, then encodes, and flushes at last.
226     * <p>
227     * This method should not be invoked if another encode operation is ongoing.
228     *
229     * @param in
230     *            the input buffer.
231     * @return a new <code>ByteBuffer</code> containing the bytes produced by
232     *         this encoding operation. The buffer's limit will be the position
233     *         of the last byte in the buffer, and the position will be zero.
234     * @throws IllegalStateException
235     *             if another encoding operation is ongoing.
236     * @throws MalformedInputException
237     *             if an illegal input character sequence for this charset is
238     *             encountered, and the action for malformed error is
239     *             {@link CodingErrorAction#REPORT CodingErrorAction.REPORT}
240     * @throws UnmappableCharacterException
241     *             if a legal but unmappable input character sequence for this
242     *             charset is encountered, and the action for unmappable
243     *             character error is
244     *             {@link CodingErrorAction#REPORT CodingErrorAction.REPORT}.
245     *             Unmappable means the Unicode character sequence at the input
246     *             buffer's current position cannot be mapped to a equivalent
247     *             byte sequence.
248     * @throws CharacterCodingException
249     *             if other exception happened during the encode operation.
250     */
251    public final ByteBuffer encode(CharBuffer in) throws CharacterCodingException {
252        int length = (int) (in.remaining() * averageBytesPerChar);
253        ByteBuffer out = ByteBuffer.allocate(length);
254
255        reset();
256
257        while (state != FLUSHED) {
258            CoderResult result = encode(in, out, true);
259            if (result == CoderResult.OVERFLOW) {
260                out = allocateMore(out);
261                continue; // No point trying to flush to an already-full buffer.
262            } else {
263                checkCoderResult(result);
264            }
265
266            result = flush(out);
267            if (result == CoderResult.OVERFLOW) {
268                out = allocateMore(out);
269            } else {
270                checkCoderResult(result);
271            }
272        }
273
274        out.flip();
275        return out;
276    }
277
278    private void checkCoderResult(CoderResult result) throws CharacterCodingException {
279        if (malformedInputAction == CodingErrorAction.REPORT && result.isMalformed()) {
280            throw new MalformedInputException(result.length());
281        } else if (unmappableCharacterAction == CodingErrorAction.REPORT && result.isUnmappable()) {
282            throw new UnmappableCharacterException(result.length());
283        }
284    }
285
286    private ByteBuffer allocateMore(ByteBuffer output) {
287        if (output.capacity() == 0) {
288            return ByteBuffer.allocate(1);
289        }
290        ByteBuffer result = ByteBuffer.allocate(output.capacity() * 2);
291        output.flip();
292        result.put(output);
293        return result;
294    }
295
296    /**
297     * Encodes characters starting at the current position of the given input
298     * buffer, and writes the equivalent byte sequence into the given output
299     * buffer from its current position.
300     * <p>
301     * The buffers' position will be changed with the reading and writing
302     * operation, but their limits and marks will be kept intact.
303     * <p>
304     * A <code>CoderResult</code> instance will be returned according to
305     * following rules:
306     * <ul>
307     * <li>A {@link CoderResult#malformedForLength(int) malformed input} result
308     * indicates that some malformed input error was encountered, and the
309     * erroneous characters start at the input buffer's position and their
310     * number can be got by result's {@link CoderResult#length() length}. This
311     * kind of result can be returned only if the malformed action is
312     * {@link CodingErrorAction#REPORT CodingErrorAction.REPORT}.</li>
313     * <li>{@link CoderResult#UNDERFLOW CoderResult.UNDERFLOW} indicates that
314     * as many characters as possible in the input buffer have been encoded. If
315     * there is no further input and no characters left in the input buffer then
316     * this task is complete. If this is not the case then the client should
317     * call this method again supplying some more input characters.</li>
318     * <li>{@link CoderResult#OVERFLOW CoderResult.OVERFLOW} indicates that the
319     * output buffer has been filled, while there are still some characters
320     * remaining in the input buffer. This method should be invoked again with a
321     * non-full output buffer.</li>
322     * <li>A {@link CoderResult#unmappableForLength(int) unmappable character}
323     * result indicates that some unmappable character error was encountered,
324     * and the erroneous characters start at the input buffer's position and
325     * their number can be got by result's {@link CoderResult#length() length}.
326     * This kind of result can be returned only on
327     * {@link CodingErrorAction#REPORT CodingErrorAction.REPORT}.</li>
328     * </ul>
329     * <p>
330     * The <code>endOfInput</code> parameter indicates if the invoker can
331     * provider further input. This parameter is true if and only if the
332     * characters in the current input buffer are all inputs for this encoding
333     * operation. Note that it is common and won't cause an error if the invoker
334     * sets false and then has no more input available, while it may cause an
335     * error if the invoker always sets true in several consecutive invocations.
336     * This would make the remaining input to be treated as malformed input.
337     * input.
338     * <p>
339     * This method invokes the
340     * {@link #encodeLoop(CharBuffer, ByteBuffer) encodeLoop} method to
341     * implement the basic encode logic for a specific charset.
342     *
343     * @param in
344     *            the input buffer.
345     * @param out
346     *            the output buffer.
347     * @param endOfInput
348     *            true if all the input characters have been provided.
349     * @return a <code>CoderResult</code> instance indicating the result.
350     * @throws IllegalStateException
351     *             if the encoding operation has already started or no more
352     *             input is needed in this encoding process.
353     * @throws CoderMalfunctionError
354     *             If the {@link #encodeLoop(CharBuffer, ByteBuffer) encodeLoop}
355     *             method threw an <code>BufferUnderflowException</code> or
356     *             <code>BufferUnderflowException</code>.
357     */
358    public final CoderResult encode(CharBuffer in, ByteBuffer out, boolean endOfInput) {
359        if (state != RESET && state != ONGOING && !(endOfInput && state == END_OF_INPUT)) {
360            throw illegalStateException();
361        }
362
363        state = endOfInput ? END_OF_INPUT : ONGOING;
364
365        while (true) {
366            CoderResult result;
367            try {
368                result = encodeLoop(in, out);
369            } catch (BufferOverflowException ex) {
370                throw new CoderMalfunctionError(ex);
371            } catch (BufferUnderflowException ex) {
372                throw new CoderMalfunctionError(ex);
373            }
374
375            if (result == CoderResult.UNDERFLOW) {
376                if (endOfInput && in.hasRemaining()) {
377                    result = CoderResult.malformedForLength(in.remaining());
378                } else {
379                    return result;
380                }
381            } else if (result == CoderResult.OVERFLOW) {
382                return result;
383            }
384
385            // We have a real error, so do what the appropriate action tells us what to do...
386            CodingErrorAction action =
387                    result.isUnmappable() ? unmappableCharacterAction : malformedInputAction;
388            if (action == CodingErrorAction.REPORT) {
389                return result;
390            } else if (action == CodingErrorAction.REPLACE) {
391                if (out.remaining() < replacementBytes.length) {
392                    return CoderResult.OVERFLOW;
393                }
394                out.put(replacementBytes);
395            }
396            in.position(in.position() + result.length());
397        }
398    }
399
400    /**
401     * Encodes characters into bytes. This method is called by
402     * {@link #encode(CharBuffer, ByteBuffer, boolean) encode}.
403     * <p>
404     * This method will implement the essential encoding operation, and it won't
405     * stop encoding until either all the input characters are read, the output
406     * buffer is filled, or some exception is encountered. Then it will
407     * return a <code>CoderResult</code> object indicating the result of the
408     * current encoding operation. The rule to construct the
409     * <code>CoderResult</code> is the same as for
410     * {@link #encode(CharBuffer, ByteBuffer, boolean) encode}. When an
411     * exception is encountered in the encoding operation, most implementations
412     * of this method will return a relevant result object to the
413     * {@link #encode(CharBuffer, ByteBuffer, boolean) encode} method, and
414     * subclasses may handle the exception and
415     * implement the error action themselves.
416     * <p>
417     * The buffers are scanned from their current positions, and their positions
418     * will be modified accordingly, while their marks and limits will be
419     * intact. At most {@link CharBuffer#remaining() in.remaining()} characters
420     * will be read, and {@link ByteBuffer#remaining() out.remaining()} bytes
421     * will be written.
422     * <p>
423     * Note that some implementations may pre-scan the input buffer and return
424     * <code>CoderResult.UNDERFLOW</code> until it receives sufficient input.
425     * <p>
426     * @param in
427     *            the input buffer.
428     * @param out
429     *            the output buffer.
430     * @return a <code>CoderResult</code> instance indicating the result.
431     */
432    protected abstract CoderResult encodeLoop(CharBuffer in, ByteBuffer out);
433
434    /**
435     * Flushes this encoder.
436     * <p>
437     * This method will call {@link #implFlush(ByteBuffer) implFlush}. Some
438     * encoders may need to write some bytes to the output buffer when they have
439     * read all input characters, subclasses can overridden
440     * {@link #implFlush(ByteBuffer) implFlush} to perform writing action.
441     * <p>
442     * The maximum number of written bytes won't larger than
443     * {@link ByteBuffer#remaining() out.remaining()}. If some encoder wants to
444     * write more bytes than the output buffer's available remaining space, then
445     * <code>CoderResult.OVERFLOW</code> will be returned, and this method
446     * must be called again with a byte buffer that has free space. Otherwise
447     * this method will return <code>CoderResult.UNDERFLOW</code>, which
448     * means one encoding process has been completed successfully.
449     * <p>
450     * During the flush, the output buffer's position will be changed
451     * accordingly, while its mark and limit will be intact.
452     *
453     * @param out
454     *            the given output buffer.
455     * @return <code>CoderResult.UNDERFLOW</code> or
456     *         <code>CoderResult.OVERFLOW</code>.
457     * @throws IllegalStateException
458     *             if this encoder isn't already flushed or at end of input.
459     */
460    public final CoderResult flush(ByteBuffer out) {
461        if (state != FLUSHED && state != END_OF_INPUT) {
462            throw illegalStateException();
463        }
464        CoderResult result = implFlush(out);
465        if (result == CoderResult.UNDERFLOW) {
466            state = FLUSHED;
467        }
468        return result;
469    }
470
471    /**
472     * Flushes this encoder. The default implementation does nothing and always
473     * returns <code>CoderResult.UNDERFLOW</code>; this method can be
474     * overridden if needed.
475     *
476     * @param out
477     *            the output buffer.
478     * @return <code>CoderResult.UNDERFLOW</code> or
479     *         <code>CoderResult.OVERFLOW</code>.
480     */
481    protected CoderResult implFlush(ByteBuffer out) {
482        return CoderResult.UNDERFLOW;
483    }
484
485    /**
486     * Notifies that this encoder's <code>CodingErrorAction</code> specified
487     * for malformed input error has been changed. The default implementation
488     * does nothing; this method can be overridden if needed.
489     *
490     * @param newAction
491     *            the new action.
492     */
493    protected void implOnMalformedInput(CodingErrorAction newAction) {
494        // default implementation is empty
495    }
496
497    /**
498     * Notifies that this encoder's <code>CodingErrorAction</code> specified
499     * for unmappable character error has been changed. The default
500     * implementation does nothing; this method can be overridden if needed.
501     *
502     * @param newAction
503     *            the new action.
504     */
505    protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
506        // default implementation is empty
507    }
508
509    /**
510     * Notifies that this encoder's replacement has been changed. The default
511     * implementation does nothing; this method can be overridden if needed.
512     *
513     * @param newReplacement
514     *            the new replacement string.
515     */
516    protected void implReplaceWith(byte[] newReplacement) {
517        // default implementation is empty
518    }
519
520    /**
521     * Resets this encoder's charset related state. The default implementation
522     * does nothing; this method can be overridden if needed.
523     */
524    protected void implReset() {
525        // default implementation is empty
526    }
527
528    /**
529     * Tests whether the given argument is legal as this encoder's replacement byte
530     * array. The given byte array is legal if and only if it can be decoded into
531     * characters.
532     */
533    public boolean isLegalReplacement(byte[] replacement) {
534        if (decoder == null) {
535            decoder = charset.newDecoder();
536            decoder.onMalformedInput(CodingErrorAction.REPORT);
537            decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
538        }
539        ByteBuffer in = ByteBuffer.wrap(replacement);
540        CharBuffer out = CharBuffer.allocate((int) (replacement.length * decoder.maxCharsPerByte()));
541        CoderResult result = decoder.decode(in, out, true);
542        return !result.isError();
543    }
544
545    /**
546     * Returns this encoder's <code>CodingErrorAction</code> when a malformed
547     * input error occurred during the encoding process.
548     */
549    public CodingErrorAction malformedInputAction() {
550        return malformedInputAction;
551    }
552
553    /**
554     * Returns the maximum number of bytes which can be created by this encoder for
555     * one input character, must be positive.
556     */
557    public final float maxBytesPerChar() {
558        return maxBytesPerChar;
559    }
560
561    /**
562     * Sets this encoder's action on malformed input error.
563     *
564     * This method will call the
565     * {@link #implOnMalformedInput(CodingErrorAction) implOnMalformedInput}
566     * method with the given new action as argument.
567     *
568     * @param newAction
569     *            the new action on malformed input error.
570     * @return this encoder.
571     * @throws IllegalArgumentException
572     *             if the given newAction is null.
573     */
574    public final CharsetEncoder onMalformedInput(CodingErrorAction newAction) {
575        if (newAction == null) {
576            throw new IllegalArgumentException("newAction == null");
577        }
578        malformedInputAction = newAction;
579        implOnMalformedInput(newAction);
580        return this;
581    }
582
583    /**
584     * Sets this encoder's action on unmappable character error.
585     *
586     * This method will call the
587     * {@link #implOnUnmappableCharacter(CodingErrorAction) implOnUnmappableCharacter}
588     * method with the given new action as argument.
589     *
590     * @param newAction
591     *            the new action on unmappable character error.
592     * @return this encoder.
593     * @throws IllegalArgumentException
594     *             if the given newAction is null.
595     */
596    public final CharsetEncoder onUnmappableCharacter(CodingErrorAction newAction) {
597        if (newAction == null) {
598            throw new IllegalArgumentException("newAction == null");
599        }
600        unmappableCharacterAction = newAction;
601        implOnUnmappableCharacter(newAction);
602        return this;
603    }
604
605    /**
606     * Returns the replacement byte array, which is never null or empty.
607     */
608    public final byte[] replacement() {
609        return replacementBytes;
610    }
611
612    /**
613     * Sets the new replacement value.
614     *
615     * This method first checks the given replacement's validity, then changes
616     * the replacement value and finally calls the
617     * {@link #implReplaceWith(byte[]) implReplaceWith} method with the given
618     * new replacement as argument.
619     *
620     * @param replacement
621     *            the replacement byte array, cannot be null or empty, its
622     *            length cannot be larger than <code>maxBytesPerChar</code>,
623     *            and it must be legal replacement, which can be justified by
624     *            calling <code>isLegalReplacement(byte[] replacement)</code>.
625     * @return this encoder.
626     * @throws IllegalArgumentException
627     *             if the given replacement cannot satisfy the requirement
628     *             mentioned above.
629     */
630    public final CharsetEncoder replaceWith(byte[] replacement) {
631        if (replacement == null) {
632            throw new IllegalArgumentException("replacement == null");
633        }
634        if (replacement.length == 0) {
635            throw new IllegalArgumentException("replacement.length == 0");
636        }
637        if (replacement.length > maxBytesPerChar()) {
638            throw new IllegalArgumentException("replacement.length > maxBytesPerChar: " +
639                    replacement.length + " > " + maxBytesPerChar());
640        }
641        if (!isLegalReplacement(replacement)) {
642            throw new IllegalArgumentException("Bad replacement: " + Arrays.toString(replacement));
643        }
644        // It seems like a bug, but the RI doesn't clone, and we have tests that check we don't.
645        this.replacementBytes = replacement;
646        implReplaceWith(replacementBytes);
647        return this;
648    }
649
650    /**
651     * Resets this encoder. This method will reset the internal state and then
652     * calls {@link #implReset} to reset any state related to the
653     * specific charset.
654     */
655    public final CharsetEncoder reset() {
656        state = RESET;
657        implReset();
658        return this;
659    }
660
661    /**
662     * Returns this encoder's <code>CodingErrorAction</code> when unmappable
663     * character occurred during encoding process.
664     */
665    public CodingErrorAction unmappableCharacterAction() {
666        return unmappableCharacterAction;
667    }
668
669    private IllegalStateException illegalStateException() {
670        throw new IllegalStateException("State: " + state);
671    }
672}
673