base64.c revision bd33537fde8e1c68fcadfd6adf77b295ada9b45f
1/*	$NetBSD: base64.c,v 1.8 2002/11/11 01:15:17 thorpej Exp $	*/
2
3/*
4 * Copyright (c) 1996 by Internet Software Consortium.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
11 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
12 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
13 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
16 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
17 * SOFTWARE.
18 */
19
20/*
21 * Portions Copyright (c) 1995 by International Business Machines, Inc.
22 *
23 * International Business Machines, Inc. (hereinafter called IBM) grants
24 * permission under its copyrights to use, copy, modify, and distribute this
25 * Software with or without fee, provided that the above copyright notice and
26 * all paragraphs of this notice appear in all copies, and that the name of IBM
27 * not be used in connection with the marketing of any product incorporating
28 * the Software or modifications thereof, without specific, written prior
29 * permission.
30 *
31 * To the extent it has a right to do so, IBM grants an immunity from suit
32 * under its patents, if any, for the use, sale or manufacture of products to
33 * the extent that such products are used for performing Domain Name System
34 * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
35 * granted for any product per se or for any other function of any product.
36 *
37 * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
38 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
40 * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
41 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
42 * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
43 */
44
45#include <sys/cdefs.h>
46#if defined(LIBC_SCCS) && !defined(lint)
47__RCSID("$NetBSD: base64.c,v 1.8 2002/11/11 01:15:17 thorpej Exp $");
48#endif /* LIBC_SCCS and not lint */
49
50#include <sys/types.h>
51#include <sys/param.h>
52#include <sys/socket.h>
53#include <netinet/in.h>
54#include <arpa/inet.h>
55#include "arpa_nameser.h"
56
57#include <assert.h>
58#include <ctype.h>
59#ifdef ANDROID_CHANGES
60#include "resolv_private.h"
61#else
62#include <resolv.h>
63#endif
64#include <stdio.h>
65
66#include <stdlib.h>
67#include <string.h>
68
69static const char Base64[] =
70	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
71static const char Pad64 = '=';
72
73/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
74   The following encoding technique is taken from RFC 1521 by Borenstein
75   and Freed.  It is reproduced here in a slightly edited form for
76   convenience.
77
78   A 65-character subset of US-ASCII is used, enabling 6 bits to be
79   represented per printable character. (The extra 65th character, "=",
80   is used to signify a special processing function.)
81
82   The encoding process represents 24-bit groups of input bits as output
83   strings of 4 encoded characters. Proceeding from left to right, a
84   24-bit input group is formed by concatenating 3 8-bit input groups.
85   These 24 bits are then treated as 4 concatenated 6-bit groups, each
86   of which is translated into a single digit in the base64 alphabet.
87
88   Each 6-bit group is used as an index into an array of 64 printable
89   characters. The character referenced by the index is placed in the
90   output string.
91
92                         Table 1: The Base64 Alphabet
93
94      Value Encoding  Value Encoding  Value Encoding  Value Encoding
95          0 A            17 R            34 i            51 z
96          1 B            18 S            35 j            52 0
97          2 C            19 T            36 k            53 1
98          3 D            20 U            37 l            54 2
99          4 E            21 V            38 m            55 3
100          5 F            22 W            39 n            56 4
101          6 G            23 X            40 o            57 5
102          7 H            24 Y            41 p            58 6
103          8 I            25 Z            42 q            59 7
104          9 J            26 a            43 r            60 8
105         10 K            27 b            44 s            61 9
106         11 L            28 c            45 t            62 +
107         12 M            29 d            46 u            63 /
108         13 N            30 e            47 v
109         14 O            31 f            48 w         (pad) =
110         15 P            32 g            49 x
111         16 Q            33 h            50 y
112
113   Special processing is performed if fewer than 24 bits are available
114   at the end of the data being encoded.  A full encoding quantum is
115   always completed at the end of a quantity.  When fewer than 24 input
116   bits are available in an input group, zero bits are added (on the
117   right) to form an integral number of 6-bit groups.  Padding at the
118   end of the data is performed using the '=' character.
119
120   Since all base64 input is an integral number of octets, only the
121         -------------------------------------------------
122   following cases can arise:
123
124       (1) the final quantum of encoding input is an integral
125           multiple of 24 bits; here, the final unit of encoded
126	   output will be an integral multiple of 4 characters
127	   with no "=" padding,
128       (2) the final quantum of encoding input is exactly 8 bits;
129           here, the final unit of encoded output will be two
130	   characters followed by two "=" padding characters, or
131       (3) the final quantum of encoding input is exactly 16 bits;
132           here, the final unit of encoded output will be three
133	   characters followed by one "=" padding character.
134   */
135
136int
137b64_ntop(src, srclength, target, targsize)
138	u_char const *src;
139	size_t srclength;
140	char *target;
141	size_t targsize;
142{
143	size_t datalength = 0;
144	u_char input[3] = { 0, 0, 0 };  /* make compiler happy */
145	u_char output[4];
146	size_t i;
147
148	assert(src != NULL);
149	assert(target != NULL);
150
151	while (2 < srclength) {
152		input[0] = *src++;
153		input[1] = *src++;
154		input[2] = *src++;
155		srclength -= 3;
156
157		output[0] = (u_int32_t)input[0] >> 2;
158		output[1] = ((u_int32_t)(input[0] & 0x03) << 4) +
159		    ((u_int32_t)input[1] >> 4);
160		output[2] = ((u_int32_t)(input[1] & 0x0f) << 2) +
161		    ((u_int32_t)input[2] >> 6);
162		output[3] = input[2] & 0x3f;
163		assert(output[0] < 64);
164		assert(output[1] < 64);
165		assert(output[2] < 64);
166		assert(output[3] < 64);
167
168		if (datalength + 4 > targsize)
169			return (-1);
170		target[datalength++] = Base64[output[0]];
171		target[datalength++] = Base64[output[1]];
172		target[datalength++] = Base64[output[2]];
173		target[datalength++] = Base64[output[3]];
174	}
175
176	/* Now we worry about padding. */
177	if (0 != srclength) {
178		/* Get what's left. */
179		input[0] = input[1] = input[2] = '\0';
180		for (i = 0; i < srclength; i++)
181			input[i] = *src++;
182
183		output[0] = (u_int32_t)input[0] >> 2;
184		output[1] = ((u_int32_t)(input[0] & 0x03) << 4) +
185		    ((u_int32_t)input[1] >> 4);
186		output[2] = ((u_int32_t)(input[1] & 0x0f) << 2) +
187		    ((u_int32_t)input[2] >> 6);
188		assert(output[0] < 64);
189		assert(output[1] < 64);
190		assert(output[2] < 64);
191
192		if (datalength + 4 > targsize)
193			return (-1);
194		target[datalength++] = Base64[output[0]];
195		target[datalength++] = Base64[output[1]];
196		if (srclength == 1)
197			target[datalength++] = Pad64;
198		else
199			target[datalength++] = Base64[output[2]];
200		target[datalength++] = Pad64;
201	}
202	if (datalength >= targsize)
203		return (-1);
204	target[datalength] = '\0';	/* Returned value doesn't count \0. */
205	return (datalength);
206}
207
208/* skips all whitespace anywhere.
209   converts characters, four at a time, starting at (or after)
210   src from base - 64 numbers into three 8 bit bytes in the target area.
211   it returns the number of data bytes stored at the target, or -1 on error.
212 */
213
214int
215b64_pton(src, target, targsize)
216	char const *src;
217	u_char *target;
218	size_t targsize;
219{
220	size_t tarindex;
221	int state, ch;
222	char *pos;
223
224	assert(src != NULL);
225	assert(target != NULL);
226
227	state = 0;
228	tarindex = 0;
229
230	while ((ch = (u_char) *src++) != '\0') {
231		if (isspace(ch))	/* Skip whitespace anywhere. */
232			continue;
233
234		if (ch == Pad64)
235			break;
236
237		pos = strchr(Base64, ch);
238		if (pos == 0) 		/* A non-base64 character. */
239			return (-1);
240
241		switch (state) {
242		case 0:
243			if (target) {
244				if (tarindex >= targsize)
245					return (-1);
246				target[tarindex] = (pos - Base64) << 2;
247			}
248			state = 1;
249			break;
250		case 1:
251			if (target) {
252				if (tarindex + 1 >= targsize)
253					return (-1);
254				target[tarindex] |=
255				    (u_int32_t)(pos - Base64) >> 4;
256				target[tarindex+1]  = ((pos - Base64) & 0x0f)
257							<< 4 ;
258			}
259			tarindex++;
260			state = 2;
261			break;
262		case 2:
263			if (target) {
264				if (tarindex + 1 >= targsize)
265					return (-1);
266				target[tarindex] |=
267					(u_int32_t)(pos - Base64) >> 2;
268				target[tarindex+1] = ((pos - Base64) & 0x03)
269							<< 6;
270			}
271			tarindex++;
272			state = 3;
273			break;
274		case 3:
275			if (target) {
276				if (tarindex >= targsize)
277					return (-1);
278				target[tarindex] |= (pos - Base64);
279			}
280			tarindex++;
281			state = 0;
282			break;
283		default:
284			abort();
285		}
286	}
287
288	/*
289	 * We are done decoding Base-64 chars.  Let's see if we ended
290	 * on a byte boundary, and/or with erroneous trailing characters.
291	 */
292
293	if (ch == Pad64) {		/* We got a pad char. */
294		ch = *src++;		/* Skip it, get next. */
295		switch (state) {
296		case 0:		/* Invalid = in first position */
297		case 1:		/* Invalid = in second position */
298			return (-1);
299
300		case 2:		/* Valid, means one byte of info */
301			/* Skip any number of spaces. */
302			for (; ch != '\0'; ch = (u_char) *src++)
303				if (!isspace(ch))
304					break;
305			/* Make sure there is another trailing = sign. */
306			if (ch != Pad64)
307				return (-1);
308			ch = *src++;		/* Skip the = */
309			/* Fall through to "single trailing =" case. */
310			/* FALLTHROUGH */
311
312		case 3:		/* Valid, means two bytes of info */
313			/*
314			 * We know this char is an =.  Is there anything but
315			 * whitespace after it?
316			 */
317			for (; ch != '\0'; ch = (u_char) *src++)
318				if (!isspace(ch))
319					return (-1);
320
321			/*
322			 * Now make sure for cases 2 and 3 that the "extra"
323			 * bits that slopped past the last full byte were
324			 * zeros.  If we don't check them, they become a
325			 * subliminal channel.
326			 */
327			if (target && target[tarindex] != 0)
328				return (-1);
329		}
330	} else {
331		/*
332		 * We ended by seeing the end of the string.  Make sure we
333		 * have no partial bytes lying around.
334		 */
335		if (state != 0)
336			return (-1);
337	}
338
339	return (tarindex);
340}
341