1/* ====================================================================
2 * Copyright (c) 2006 J.T. Beetstra
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * ====================================================================
23 */
24
25package com.beetstra.jutf7;
26
27import java.nio.charset.Charset;
28import java.nio.charset.CharsetDecoder;
29import java.nio.charset.CharsetEncoder;
30import java.util.Arrays;
31import java.util.List;
32
33/**
34 * <p>
35 * Abstract base class for UTF-7 style encoding and decoding.
36 * </p>
37 *
38 * @author Jaap Beetstra
39 */
40abstract class UTF7StyleCharset extends Charset {
41    private static final List CONTAINED = Arrays.asList(new String[] {
42            "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE"
43    });
44    final boolean strict;
45    Base64Util base64;
46
47    /**
48     * <p>
49     * Besides the name and aliases, two additional parameters are required.
50     * First the base 64 alphabet used; in modified UTF-7 a slightly different
51     * alphabet is used. Additionally, it should be specified if encoders and
52     * decoders should be strict about the interpretation of malformed encoded
53     * sequences. This is used since modified UTF-7 specifically disallows some
54     * constructs which are allowed (or not specifically disallowed) in UTF-7
55     * (RFC 2152).
56     * </p>
57     *
58     * @param canonicalName The name as defined in java.nio.charset.Charset
59     * @param aliases The aliases as defined in java.nio.charset.Charset
60     * @param alphabet The base 64 alphabet used
61     * @param strict True if strict handling of sequences is requested
62     */
63    protected UTF7StyleCharset(String canonicalName, String[] aliases, String alphabet,
64            boolean strict) {
65        super(canonicalName, aliases);
66        this.base64 = new Base64Util(alphabet);
67        this.strict = strict;
68    }
69
70    /*
71     * (non-Javadoc)
72     * @see java.nio.charset.Charset#contains(java.nio.charset.Charset)
73     */
74    public boolean contains(final Charset cs) {
75        return CONTAINED.contains(cs.name());
76    }
77
78    /*
79     * (non-Javadoc)
80     * @see java.nio.charset.Charset#newDecoder()
81     */
82    public CharsetDecoder newDecoder() {
83        return new UTF7StyleCharsetDecoder(this, base64, strict);
84    }
85
86    /*
87     * (non-Javadoc)
88     * @see java.nio.charset.Charset#newEncoder()
89     */
90    public CharsetEncoder newEncoder() {
91        return new UTF7StyleCharsetEncoder(this, base64, strict);
92    }
93
94    /**
95     * Tells if a character can be encoded using simple (US-ASCII) encoding or
96     * requires base 64 encoding.
97     *
98     * @param ch The character
99     * @return True if the character can be encoded directly, false otherwise
100     */
101    abstract boolean canEncodeDirectly(char ch);
102
103    /**
104     * Returns character used to switch to base 64 encoding.
105     *
106     * @return The shift character
107     */
108    abstract byte shift();
109
110    /**
111     * Returns character used to switch from base 64 encoding to simple
112     * encoding.
113     *
114     * @return The unshift character
115     */
116    abstract byte unshift();
117}
118