1/*
2 * Copyright 2001-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.commons.codec.binary;
18
19import org.apache.commons.codec.BinaryDecoder;
20import org.apache.commons.codec.BinaryEncoder;
21import org.apache.commons.codec.DecoderException;
22import org.apache.commons.codec.EncoderException;
23
24/**
25 * Provides Base64 encoding and decoding as defined by RFC 2045.
26 *
27 * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite>
28 * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One:
29 * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p>
30 *
31 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
32 * @author Apache Software Foundation
33 * @since 1.0-dev
34 * @version $Id: Base64.java,v 1.20 2004/05/24 00:21:24 ggregory Exp $
35 *
36 * @deprecated Please use {@link java.net.URL#openConnection} instead.
37 *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
38 *     for further details.
39 */
40@Deprecated
41public class Base64 implements BinaryEncoder, BinaryDecoder {
42
43    /**
44     * Chunk size per RFC 2045 section 6.8.
45     *
46     * <p>The {@value} character limit does not count the trailing CRLF, but counts
47     * all other characters, including any equal signs.</p>
48     *
49     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
50     */
51    static final int CHUNK_SIZE = 76;
52
53    /**
54     * Chunk separator per RFC 2045 section 2.1.
55     *
56     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
57     */
58    static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
59
60    /**
61     * The base length.
62     */
63    static final int BASELENGTH = 255;
64
65    /**
66     * Lookup length.
67     */
68    static final int LOOKUPLENGTH = 64;
69
70    /**
71     * Used to calculate the number of bits in a byte.
72     */
73    static final int EIGHTBIT = 8;
74
75    /**
76     * Used when encoding something which has fewer than 24 bits.
77     */
78    static final int SIXTEENBIT = 16;
79
80    /**
81     * Used to determine how many bits data contains.
82     */
83    static final int TWENTYFOURBITGROUP = 24;
84
85    /**
86     * Used to get the number of Quadruples.
87     */
88    static final int FOURBYTE = 4;
89
90    /**
91     * Used to test the sign of a byte.
92     */
93    static final int SIGN = -128;
94
95    /**
96     * Byte used to pad output.
97     */
98    static final byte PAD = (byte) '=';
99
100    // Create arrays to hold the base64 characters and a
101    // lookup for base64 chars
102    private static byte[] base64Alphabet = new byte[BASELENGTH];
103    private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
104
105    // Populating the lookup and character arrays
106    static {
107        for (int i = 0; i < BASELENGTH; i++) {
108            base64Alphabet[i] = (byte) -1;
109        }
110        for (int i = 'Z'; i >= 'A'; i--) {
111            base64Alphabet[i] = (byte) (i - 'A');
112        }
113        for (int i = 'z'; i >= 'a'; i--) {
114            base64Alphabet[i] = (byte) (i - 'a' + 26);
115        }
116        for (int i = '9'; i >= '0'; i--) {
117            base64Alphabet[i] = (byte) (i - '0' + 52);
118        }
119
120        base64Alphabet['+'] = 62;
121        base64Alphabet['/'] = 63;
122
123        for (int i = 0; i <= 25; i++) {
124            lookUpBase64Alphabet[i] = (byte) ('A' + i);
125        }
126
127        for (int i = 26, j = 0; i <= 51; i++, j++) {
128            lookUpBase64Alphabet[i] = (byte) ('a' + j);
129        }
130
131        for (int i = 52, j = 0; i <= 61; i++, j++) {
132            lookUpBase64Alphabet[i] = (byte) ('0' + j);
133        }
134
135        lookUpBase64Alphabet[62] = (byte) '+';
136        lookUpBase64Alphabet[63] = (byte) '/';
137    }
138
139    private static boolean isBase64(byte octect) {
140        if (octect == PAD) {
141            return true;
142        } else if (base64Alphabet[octect] == -1) {
143            return false;
144        } else {
145            return true;
146        }
147    }
148
149    /**
150     * Tests a given byte array to see if it contains
151     * only valid characters within the Base64 alphabet.
152     *
153     * @param arrayOctect byte array to test
154     * @return true if all bytes are valid characters in the Base64
155     *         alphabet or if the byte array is empty; false, otherwise
156     */
157    public static boolean isArrayByteBase64(byte[] arrayOctect) {
158
159        arrayOctect = discardWhitespace(arrayOctect);
160
161        int length = arrayOctect.length;
162        if (length == 0) {
163            // shouldn't a 0 length array be valid base64 data?
164            // return false;
165            return true;
166        }
167        for (int i = 0; i < length; i++) {
168            if (!isBase64(arrayOctect[i])) {
169                return false;
170            }
171        }
172        return true;
173    }
174
175    /**
176     * Encodes binary data using the base64 algorithm but
177     * does not chunk the output.
178     *
179     * @param binaryData binary data to encode
180     * @return Base64 characters
181     */
182    public static byte[] encodeBase64(byte[] binaryData) {
183        return encodeBase64(binaryData, false);
184    }
185
186    /**
187     * Encodes binary data using the base64 algorithm and chunks
188     * the encoded output into 76 character blocks
189     *
190     * @param binaryData binary data to encode
191     * @return Base64 characters chunked in 76 character blocks
192     */
193    public static byte[] encodeBase64Chunked(byte[] binaryData) {
194        return encodeBase64(binaryData, true);
195    }
196
197
198    /**
199     * Decodes an Object using the base64 algorithm.  This method
200     * is provided in order to satisfy the requirements of the
201     * Decoder interface, and will throw a DecoderException if the
202     * supplied object is not of type byte[].
203     *
204     * @param pObject Object to decode
205     * @return An object (of type byte[]) containing the
206     *         binary data which corresponds to the byte[] supplied.
207     * @throws DecoderException if the parameter supplied is not
208     *                          of type byte[]
209     */
210    public Object decode(Object pObject) throws DecoderException {
211        if (!(pObject instanceof byte[])) {
212            throw new DecoderException("Parameter supplied to Base64 decode is not a byte[]");
213        }
214        return decode((byte[]) pObject);
215    }
216
217    /**
218     * Decodes a byte[] containing containing
219     * characters in the Base64 alphabet.
220     *
221     * @param pArray A byte array containing Base64 character data
222     * @return a byte array containing binary data
223     */
224    public byte[] decode(byte[] pArray) {
225        return decodeBase64(pArray);
226    }
227
228    /**
229     * Encodes binary data using the base64 algorithm, optionally
230     * chunking the output into 76 character blocks.
231     *
232     * @param binaryData Array containing binary data to encode.
233     * @param isChunked if isChunked is true this encoder will chunk
234     *                  the base64 output into 76 character blocks
235     * @return Base64-encoded data.
236     */
237    public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
238        int lengthDataBits = binaryData.length * EIGHTBIT;
239        int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
240        int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
241        byte encodedData[] = null;
242        int encodedDataLength = 0;
243        int nbrChunks = 0;
244
245        if (fewerThan24bits != 0) {
246            //data not divisible by 24 bit
247            encodedDataLength = (numberTriplets + 1) * 4;
248        } else {
249            // 16 or 8 bit
250            encodedDataLength = numberTriplets * 4;
251        }
252
253        // If the output is to be "chunked" into 76 character sections,
254        // for compliance with RFC 2045 MIME, then it is important to
255        // allow for extra length to account for the separator(s)
256        if (isChunked) {
257
258            nbrChunks =
259                (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE));
260            encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
261        }
262
263        encodedData = new byte[encodedDataLength];
264
265        byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
266
267        int encodedIndex = 0;
268        int dataIndex = 0;
269        int i = 0;
270        int nextSeparatorIndex = CHUNK_SIZE;
271        int chunksSoFar = 0;
272
273        //log.debug("number of triplets = " + numberTriplets);
274        for (i = 0; i < numberTriplets; i++) {
275            dataIndex = i * 3;
276            b1 = binaryData[dataIndex];
277            b2 = binaryData[dataIndex + 1];
278            b3 = binaryData[dataIndex + 2];
279
280            //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
281
282            l = (byte) (b2 & 0x0f);
283            k = (byte) (b1 & 0x03);
284
285            byte val1 =
286                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
287            byte val2 =
288                ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
289            byte val3 =
290                ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
291
292            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
293            //log.debug( "val2 = " + val2 );
294            //log.debug( "k4   = " + (k<<4) );
295            //log.debug(  "vak  = " + (val2 | (k<<4)) );
296            encodedData[encodedIndex + 1] =
297                lookUpBase64Alphabet[val2 | (k << 4)];
298            encodedData[encodedIndex + 2] =
299                lookUpBase64Alphabet[(l << 2) | val3];
300            encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
301
302            encodedIndex += 4;
303
304            // If we are chunking, let's put a chunk separator down.
305            if (isChunked) {
306                // this assumes that CHUNK_SIZE % 4 == 0
307                if (encodedIndex == nextSeparatorIndex) {
308                    System.arraycopy(
309                        CHUNK_SEPARATOR,
310                        0,
311                        encodedData,
312                        encodedIndex,
313                        CHUNK_SEPARATOR.length);
314                    chunksSoFar++;
315                    nextSeparatorIndex =
316                        (CHUNK_SIZE * (chunksSoFar + 1)) +
317                        (chunksSoFar * CHUNK_SEPARATOR.length);
318                    encodedIndex += CHUNK_SEPARATOR.length;
319                }
320            }
321        }
322
323        // form integral number of 6-bit groups
324        dataIndex = i * 3;
325
326        if (fewerThan24bits == EIGHTBIT) {
327            b1 = binaryData[dataIndex];
328            k = (byte) (b1 & 0x03);
329            //log.debug("b1=" + b1);
330            //log.debug("b1<<2 = " + (b1>>2) );
331            byte val1 =
332                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
333            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
334            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
335            encodedData[encodedIndex + 2] = PAD;
336            encodedData[encodedIndex + 3] = PAD;
337        } else if (fewerThan24bits == SIXTEENBIT) {
338
339            b1 = binaryData[dataIndex];
340            b2 = binaryData[dataIndex + 1];
341            l = (byte) (b2 & 0x0f);
342            k = (byte) (b1 & 0x03);
343
344            byte val1 =
345                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
346            byte val2 =
347                ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
348
349            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
350            encodedData[encodedIndex + 1] =
351                lookUpBase64Alphabet[val2 | (k << 4)];
352            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
353            encodedData[encodedIndex + 3] = PAD;
354        }
355
356        if (isChunked) {
357            // we also add a separator to the end of the final chunk.
358            if (chunksSoFar < nbrChunks) {
359                System.arraycopy(
360                    CHUNK_SEPARATOR,
361                    0,
362                    encodedData,
363                    encodedDataLength - CHUNK_SEPARATOR.length,
364                    CHUNK_SEPARATOR.length);
365            }
366        }
367
368        return encodedData;
369    }
370
371    /**
372     * Decodes Base64 data into octects
373     *
374     * @param base64Data Byte array containing Base64 data
375     * @return Array containing decoded data.
376     */
377    public static byte[] decodeBase64(byte[] base64Data) {
378        // RFC 2045 requires that we discard ALL non-Base64 characters
379        base64Data = discardNonBase64(base64Data);
380
381        // handle the edge case, so we don't have to worry about it later
382        if (base64Data.length == 0) {
383            return new byte[0];
384        }
385
386        int numberQuadruple = base64Data.length / FOURBYTE;
387        byte decodedData[] = null;
388        byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
389
390        // Throw away anything not in base64Data
391
392        int encodedIndex = 0;
393        int dataIndex = 0;
394        {
395            // this sizes the output array properly - rlw
396            int lastData = base64Data.length;
397            // ignore the '=' padding
398            while (base64Data[lastData - 1] == PAD) {
399                if (--lastData == 0) {
400                    return new byte[0];
401                }
402            }
403            decodedData = new byte[lastData - numberQuadruple];
404        }
405
406        for (int i = 0; i < numberQuadruple; i++) {
407            dataIndex = i * 4;
408            marker0 = base64Data[dataIndex + 2];
409            marker1 = base64Data[dataIndex + 3];
410
411            b1 = base64Alphabet[base64Data[dataIndex]];
412            b2 = base64Alphabet[base64Data[dataIndex + 1]];
413
414            if (marker0 != PAD && marker1 != PAD) {
415                //No PAD e.g 3cQl
416                b3 = base64Alphabet[marker0];
417                b4 = base64Alphabet[marker1];
418
419                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
420                decodedData[encodedIndex + 1] =
421                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
422                decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
423            } else if (marker0 == PAD) {
424                //Two PAD e.g. 3c[Pad][Pad]
425                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
426            } else if (marker1 == PAD) {
427                //One PAD e.g. 3cQ[Pad]
428                b3 = base64Alphabet[marker0];
429
430                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
431                decodedData[encodedIndex + 1] =
432                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
433            }
434            encodedIndex += 3;
435        }
436        return decodedData;
437    }
438
439    /**
440     * Discards any whitespace from a base-64 encoded block.
441     *
442     * @param data The base-64 encoded data to discard the whitespace
443     * from.
444     * @return The data, less whitespace (see RFC 2045).
445     */
446    static byte[] discardWhitespace(byte[] data) {
447        byte groomedData[] = new byte[data.length];
448        int bytesCopied = 0;
449
450        for (int i = 0; i < data.length; i++) {
451            switch (data[i]) {
452            case (byte) ' ' :
453            case (byte) '\n' :
454            case (byte) '\r' :
455            case (byte) '\t' :
456                    break;
457            default:
458                    groomedData[bytesCopied++] = data[i];
459            }
460        }
461
462        byte packedData[] = new byte[bytesCopied];
463
464        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
465
466        return packedData;
467    }
468
469    /**
470     * Discards any characters outside of the base64 alphabet, per
471     * the requirements on page 25 of RFC 2045 - "Any characters
472     * outside of the base64 alphabet are to be ignored in base64
473     * encoded data."
474     *
475     * @param data The base-64 encoded data to groom
476     * @return The data, less non-base64 characters (see RFC 2045).
477     */
478    static byte[] discardNonBase64(byte[] data) {
479        byte groomedData[] = new byte[data.length];
480        int bytesCopied = 0;
481
482        for (int i = 0; i < data.length; i++) {
483            if (isBase64(data[i])) {
484                groomedData[bytesCopied++] = data[i];
485            }
486        }
487
488        byte packedData[] = new byte[bytesCopied];
489
490        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
491
492        return packedData;
493    }
494
495
496    // Implementation of the Encoder Interface
497
498    /**
499     * Encodes an Object using the base64 algorithm.  This method
500     * is provided in order to satisfy the requirements of the
501     * Encoder interface, and will throw an EncoderException if the
502     * supplied object is not of type byte[].
503     *
504     * @param pObject Object to encode
505     * @return An object (of type byte[]) containing the
506     *         base64 encoded data which corresponds to the byte[] supplied.
507     * @throws EncoderException if the parameter supplied is not
508     *                          of type byte[]
509     */
510    public Object encode(Object pObject) throws EncoderException {
511        if (!(pObject instanceof byte[])) {
512            throw new EncoderException(
513                "Parameter supplied to Base64 encode is not a byte[]");
514        }
515        return encode((byte[]) pObject);
516    }
517
518    /**
519     * Encodes a byte[] containing binary data, into a byte[] containing
520     * characters in the Base64 alphabet.
521     *
522     * @param pArray a byte array containing binary data
523     * @return A byte array containing only Base64 character data
524     */
525    public byte[] encode(byte[] pArray) {
526        return encodeBase64(pArray, false);
527    }
528
529}
530