1/* ====================================================================
2 * Copyright (c) 2006 J.T. Beetstra
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * ====================================================================
23 */
24
25package com.beetstra.jutf7;
26
27import java.util.Arrays;
28
29/**
30 * <p>
31 * Represent a base 64 mapping. The 64 characters used in the encoding can be
32 * specified, since modified-UTF-7 uses other characters than UTF-7 (',' instead
33 * of '/').
34 * </p>
35 * <p>
36 * The exact type of the arguments and result values is adapted to the needs of
37 * the encoder and decoder, as opposed to following a strict interpretation of
38 * base 64.
39 * </p>
40 * <p>
41 * Base 64, as specified in RFC 2045, is an encoding used to encode bytes as
42 * characters. In (modified-)UTF-7 however, it is used to encode characters as
43 * bytes, using some intermediate steps:
44 * </p>
45 * <ol>
46 * <li>Encode all characters as a 16-bit (UTF-16) integer value</li>
47 * <li>Write this as stream of bytes (most-significant first)</li>
48 * <li>Encode these bytes using (modified) base 64 encoding</li>
49 * <li>Write the thus formed stream of characters as a stream of bytes, using
50 * ASCII encoding</li>
51 * </ol>
52 *
53 * @author Jaap Beetstra
54 */
55class Base64Util {
56    private static final int ALPHABET_LENGTH = 64;
57    private final char[] alphabet;
58    private final int[] inverseAlphabet;
59
60    /**
61     * Initializes the class with the specified encoding/decoding alphabet.
62     *
63     * @param alphabet
64     * @throws IllegalArgumentException if alphabet is not 64 characters long or
65     *             contains characters which are not 7-bit ASCII
66     */
67    Base64Util(final String alphabet) {
68        this.alphabet = alphabet.toCharArray();
69        if (alphabet.length() != ALPHABET_LENGTH)
70            throw new IllegalArgumentException("alphabet has incorrect length (should be 64, not "
71                    + alphabet.length() + ")");
72        inverseAlphabet = new int[128];
73        Arrays.fill(inverseAlphabet, -1);
74        for (int i = 0; i < this.alphabet.length; i++) {
75            final char ch = this.alphabet[i];
76            if (ch >= 128)
77                throw new IllegalArgumentException("invalid character in alphabet: " + ch);
78            inverseAlphabet[ch] = i;
79        }
80    }
81
82    /**
83     * Returns the integer value of the six bits represented by the specified
84     * character.
85     *
86     * @param ch The character, as a ASCII encoded byte
87     * @return The six bits, as an integer value, or -1 if the byte is not in
88     *         the alphabet
89     */
90    int getSextet(final byte ch) {
91        if (ch >= 128)
92            return -1;
93        return inverseAlphabet[ch];
94    }
95
96    /**
97     * Tells whether the alphabet contains the specified character.
98     *
99     * @param ch The character
100     * @return true if the alphabet contains <code>ch</code>, false otherwise
101     */
102    boolean contains(final char ch) {
103        if (ch >= 128)
104            return false;
105        return inverseAlphabet[ch] >= 0;
106    }
107
108    /**
109     * Encodes the six bit group as a character.
110     *
111     * @param sextet The six bit group to be encoded
112     * @return The ASCII value of the character
113     */
114    byte getChar(final int sextet) {
115        return (byte)alphabet[sextet];
116    }
117}
118