1/* ====================================================================
2 * Copyright (c) 2006 J.T. Beetstra
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * ====================================================================
23 */
24
25package com.beetstra.jutf7;
26
27import java.nio.ByteBuffer;
28import java.nio.CharBuffer;
29import java.nio.charset.CharsetEncoder;
30import java.nio.charset.CoderResult;
31
32/**
33 * <p>
34 * The CharsetEncoder used to encode both variants of the UTF-7 charset and the
35 * modified-UTF-7 charset.
36 * </p>
37 * <p>
38 * <strong>Please note this class does not behave strictly according to the
39 * specification in Sun Java VMs before 1.6.</strong> This is done to get around
40 * a bug in the implementation of
41 * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)}. Unfortunately,
42 * that method cannot be overridden.
43 * </p>
44 *
45 * @see <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">JDK
46 *      bug 6221056< /a>
47 * @author Jaap Beetstra
48 */
49class UTF7StyleCharsetEncoder extends CharsetEncoder {
50    private static final float AVG_BYTES_PER_CHAR = 1.5f;
51    private static final float MAX_BYTES_PER_CHAR = 5.0f;
52    private final UTF7StyleCharset cs;
53    private final Base64Util base64;
54    private final byte shift;
55    private final byte unshift;
56    private final boolean strict;
57    private boolean base64mode;
58    private int bitsToOutput;
59    private int sextet;
60    static boolean useUglyHackToForceCallToFlushInJava5;
61    static {
62        String version = System.getProperty("java.specification.version");
63        String vendor = System.getProperty("java.vm.vendor");
64        useUglyHackToForceCallToFlushInJava5 = "1.4".equals(version) || "1.5".equals(version);
65        useUglyHackToForceCallToFlushInJava5 &= "Sun Microsystems Inc.".equals(vendor);
66    }
67
68    UTF7StyleCharsetEncoder(UTF7StyleCharset cs, Base64Util base64, boolean strict) {
69        super(cs, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR);
70        this.cs = cs;
71        this.base64 = base64;
72        this.strict = strict;
73        this.shift = cs.shift();
74        this.unshift = cs.unshift();
75    }
76
77    /*
78     * (non-Javadoc)
79     * @see java.nio.charset.CharsetEncoder#implReset()
80     */
81    protected void implReset() {
82        base64mode = false;
83        sextet = 0;
84        bitsToOutput = 0;
85    }
86
87    /**
88     * {@inheritDoc}
89     * <p>
90     * Note that this method might return <code>CoderResult.OVERFLOW</code> (as
91     * is required by the specification) if insufficient space is available in
92     * the output buffer. However, calling it again on JDKs before Java 6
93     * triggers a bug in
94     * {@link java.nio.charset.CharsetEncoder#flush(ByteBuffer)} causing it to
95     * throw an IllegalStateException (the buggy method is <code>final</code>,
96     * thus cannot be overridden).
97     * </p>
98     *
99     * @see <a
100     *      href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6227608">
101     *      JDK bug 6227608< /a>
102     * @param out The output byte buffer
103     * @return A coder-result object describing the reason for termination
104     */
105    protected CoderResult implFlush(ByteBuffer out) {
106        if (base64mode) {
107            if (out.remaining() < 2)
108                return CoderResult.OVERFLOW;
109            if (bitsToOutput != 0)
110                out.put(base64.getChar(sextet));
111            out.put(unshift);
112        }
113        return CoderResult.UNDERFLOW;
114    }
115
116    /**
117     * {@inheritDoc}
118     * <p>
119     * Note that this method might return <code>CoderResult.OVERFLOW</code>,
120     * even though there is sufficient space available in the output buffer.
121     * This is done to force the broken implementation of
122     * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)} to call flush
123     * (the buggy method is <code>final</code>, thus cannot be overridden).
124     * </p>
125     * <p>
126     * However, String.getBytes() fails if CoderResult.OVERFLOW is returned,
127     * since this assumes it always allocates sufficient bytes (maxBytesPerChar
128     * * nr_of_chars). Thus, as an extra check, the size of the input buffer is
129     * compared against the size of the output buffer. A static variable is used
130     * to indicate if a broken java version is used.
131     * </p>
132     * <p>
133     * It is not possible to directly write the last few bytes, since more bytes
134     * might be waiting to be encoded then those available in the input buffer.
135     * </p>
136     *
137     * @see <a
138     *      href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">
139     *      JDK bug 6221056< /a>
140     * @param in The input character buffer
141     * @param out The output byte buffer
142     * @return A coder-result object describing the reason for termination
143     */
144    protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
145        while (in.hasRemaining()) {
146            if (out.remaining() < 4)
147                return CoderResult.OVERFLOW;
148            char ch = in.get();
149            if (cs.canEncodeDirectly(ch)) {
150                unshift(out, ch);
151                out.put((byte)ch);
152            } else if (!base64mode && ch == shift) {
153                out.put(shift);
154                out.put(unshift);
155            } else
156                encodeBase64(ch, out);
157        }
158        /*
159         * <HACK type="ugly"> These lines are required to trick JDK 1.5 and
160         * earlier into flushing when using Charset.encode(String),
161         * Charset.encode(CharBuffer) or CharsetEncoder.encode(CharBuffer)
162         * Without them, the last few bytes may be missing.
163         */
164        if (base64mode && useUglyHackToForceCallToFlushInJava5
165                && out.limit() != MAX_BYTES_PER_CHAR * in.limit())
166            return CoderResult.OVERFLOW;
167        /* </HACK> */
168        return CoderResult.UNDERFLOW;
169    }
170
171    /**
172     * <p>
173     * Writes the bytes necessary to leave <i>base 64 mode</i>. This might
174     * include an unshift character.
175     * </p>
176     *
177     * @param out
178     * @param ch
179     */
180    private void unshift(ByteBuffer out, char ch) {
181        if (!base64mode)
182            return;
183        if (bitsToOutput != 0)
184            out.put(base64.getChar(sextet));
185        if (base64.contains(ch) || ch == unshift || strict)
186            out.put(unshift);
187        base64mode = false;
188        sextet = 0;
189        bitsToOutput = 0;
190    }
191
192    /**
193     * <p>
194     * Writes the bytes necessary to encode a character in <i>base 64 mode</i>.
195     * All bytes which are fully determined will be written. The fields
196     * <code>bitsToOutput</code> and <code>sextet</code> are used to remember
197     * the bytes not yet fully determined.
198     * </p>
199     *
200     * @param out
201     * @param ch
202     */
203    private void encodeBase64(char ch, ByteBuffer out) {
204        if (!base64mode)
205            out.put(shift);
206        base64mode = true;
207        bitsToOutput += 16;
208        while (bitsToOutput >= 6) {
209            bitsToOutput -= 6;
210            sextet += (ch >> bitsToOutput);
211            sextet &= 0x3F;
212            out.put(base64.getChar(sextet));
213            sextet = 0;
214        }
215        sextet = (ch << (6 - bitsToOutput)) & 0x3F;
216    }
217}
218