1/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6**      each line encodes 45 bytes (except possibly the last)
7**      First char encodes (binary) length, rest data
8**      each char encodes 6 bits, as follows:
9**      binary: 01234567 abcdefgh ijklmnop
10**      ascii:  012345 67abcd efghij klmnop
11**      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12**      short binary data is zero-extended (so the bits are always in the
13**      right place), this does *not* reflect in the length.
14** base64:
15**      Line breaks are insignificant, but lines are at most 76 chars
16**      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17**      is done via a table.
18**      Short binary data is filled (in ASCII) with '='.
19** hqx:
20**      File starts with introductory text, real data starts and ends
21**      with colons.
22**      Data consists of three similar parts: info, datafork, resourcefork.
23**      Each part is protected (at the end) with a 16-bit crc
24**      The binary data is run-length encoded, and then ascii-fied:
25**      binary: 01234567 abcdefgh ijklmnop
26**      ascii:  012345 67abcd efghij klmnop
27**      ASCII encoding is table-driven, see the code.
28**      Short binary data results in the runt ascii-byte being output with
29**      the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34**      Programs that encode binary data in ASCII are written in
35**      such a style that they are as unreadable as possible. Devices used
36**      include unnecessary global variables, burying important tables
37**      in unrelated sourcefiles, putting functions in include files,
38**      using seemingly-descriptive variable names for different purposes,
39**      calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
45**
46** Added support for quoted-printable encoding, based on rfc 1521 et al
47** quoted-printable encoding specifies that non printable characters (anything
48** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character.  It also specifies some other behavior to enable 8bit data
50** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
52**
53** Brandon Long, September 2001.
54*/
55
56#define PY_SSIZE_T_CLEAN
57
58#include "Python.h"
59#ifdef USE_ZLIB_CRC32
60#include "zlib.h"
61#endif
62
63static PyObject *Error;
64static PyObject *Incomplete;
65
66/*
67** hqx lookup table, ascii->binary.
68*/
69
70#define RUNCHAR 0x90
71
72#define DONE 0x7F
73#define SKIP 0x7E
74#define FAIL 0x7D
75
76static unsigned char table_a2b_hqx[256] = {
77/*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
78/* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79/*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
80/* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81/*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
82/* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83/*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
84/* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85/*              !     "     #     $     %     &     '   */
86/* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87/*        (     )     *     +     ,     -     .     /   */
88/* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89/*        0     1     2     3     4     5     6     7   */
90/* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91/*        8     9     :     ;     <     =     >     ?   */
92/* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93/*        @     A     B     C     D     E     F     G   */
94/* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95/*        H     I     J     K     L     M     N     O   */
96/* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97/*        P     Q     R     S     T     U     V     W   */
98/*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99/*        X     Y     Z     [     \     ]     ^     _   */
100/*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101/*        `     a     b     c     d     e     f     g   */
102/*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103/*        h     i     j     k     l     m     n     o   */
104/*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105/*        p     q     r     s     t     u     v     w   */
106/*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107/*        x     y     z     {     |     }     ~    ^?   */
108/*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109/*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124    FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125};
126
127static unsigned char table_b2a_hqx[] =
128"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
129
130static char table_a2b_base64[] = {
131    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
136    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
139};
140
141#define BASE64_PAD '='
142
143/* Max binary chunk size; limited only by available memory */
144#define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
145
146static unsigned char table_b2a_base64[] =
147"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
148
149
150
151static unsigned short crctab_hqx[256] = {
152    0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153    0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154    0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155    0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156    0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157    0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158    0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159    0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160    0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161    0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162    0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163    0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164    0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165    0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166    0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167    0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168    0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169    0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170    0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171    0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172    0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173    0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174    0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175    0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176    0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177    0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178    0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179    0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180    0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181    0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182    0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183    0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
184};
185
186PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
187
188static PyObject *
189binascii_a2b_uu(PyObject *self, PyObject *args)
190{
191    Py_buffer pascii;
192    unsigned char *ascii_data, *bin_data;
193    int leftbits = 0;
194    unsigned char this_ch;
195    unsigned int leftchar = 0;
196    PyObject *rv;
197    Py_ssize_t ascii_len, bin_len;
198
199    if ( !PyArg_ParseTuple(args, "s*:a2b_uu", &pascii) )
200        return NULL;
201    ascii_data = pascii.buf;
202    ascii_len = pascii.len;
203
204    assert(ascii_len >= 0);
205
206    /* First byte: binary data length (in bytes) */
207    bin_len = (*ascii_data++ - ' ') & 077;
208    ascii_len--;
209
210    /* Allocate the buffer */
211    if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
212        PyBuffer_Release(&pascii);
213        return NULL;
214    }
215    bin_data = (unsigned char *)PyString_AS_STRING(rv);
216
217    for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
218        /* XXX is it really best to add NULs if there's no more data */
219        this_ch = (ascii_len > 0) ? *ascii_data : 0;
220        if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
221            /*
222            ** Whitespace. Assume some spaces got eaten at
223            ** end-of-line. (We check this later)
224            */
225            this_ch = 0;
226        } else {
227            /* Check the character for legality
228            ** The 64 in stead of the expected 63 is because
229            ** there are a few uuencodes out there that use
230            ** '`' as zero instead of space.
231            */
232            if ( this_ch < ' ' || this_ch > (' ' + 64)) {
233                PyErr_SetString(Error, "Illegal char");
234                PyBuffer_Release(&pascii);
235                Py_DECREF(rv);
236                return NULL;
237            }
238            this_ch = (this_ch - ' ') & 077;
239        }
240        /*
241        ** Shift it in on the low end, and see if there's
242        ** a byte ready for output.
243        */
244        leftchar = (leftchar << 6) | (this_ch);
245        leftbits += 6;
246        if ( leftbits >= 8 ) {
247            leftbits -= 8;
248            *bin_data++ = (leftchar >> leftbits) & 0xff;
249            leftchar &= ((1 << leftbits) - 1);
250            bin_len--;
251        }
252    }
253    /*
254    ** Finally, check that if there's anything left on the line
255    ** that it's whitespace only.
256    */
257    while( ascii_len-- > 0 ) {
258        this_ch = *ascii_data++;
259        /* Extra '`' may be written as padding in some cases */
260        if ( this_ch != ' ' && this_ch != ' '+64 &&
261             this_ch != '\n' && this_ch != '\r' ) {
262            PyErr_SetString(Error, "Trailing garbage");
263            PyBuffer_Release(&pascii);
264            Py_DECREF(rv);
265            return NULL;
266        }
267    }
268    PyBuffer_Release(&pascii);
269    return rv;
270}
271
272PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
273
274static PyObject *
275binascii_b2a_uu(PyObject *self, PyObject *args)
276{
277    Py_buffer pbin;
278    unsigned char *ascii_data, *bin_data;
279    int leftbits = 0;
280    unsigned char this_ch;
281    unsigned int leftchar = 0;
282    PyObject *rv;
283    Py_ssize_t bin_len;
284
285    if ( !PyArg_ParseTuple(args, "s*:b2a_uu", &pbin) )
286        return NULL;
287    bin_data = pbin.buf;
288    bin_len = pbin.len;
289    if ( bin_len > 45 ) {
290        /* The 45 is a limit that appears in all uuencode's */
291        PyErr_SetString(Error, "At most 45 bytes at once");
292        PyBuffer_Release(&pbin);
293        return NULL;
294    }
295
296    /* We're lazy and allocate to much (fixed up later) */
297    if ( (rv=PyString_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
298        PyBuffer_Release(&pbin);
299        return NULL;
300    }
301    ascii_data = (unsigned char *)PyString_AS_STRING(rv);
302
303    /* Store the length */
304    *ascii_data++ = ' ' + (bin_len & 077);
305
306    for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
307        /* Shift the data (or padding) into our buffer */
308        if ( bin_len > 0 )              /* Data */
309            leftchar = (leftchar << 8) | *bin_data;
310        else                            /* Padding */
311            leftchar <<= 8;
312        leftbits += 8;
313
314        /* See if there are 6-bit groups ready */
315        while ( leftbits >= 6 ) {
316            this_ch = (leftchar >> (leftbits-6)) & 0x3f;
317            leftbits -= 6;
318            *ascii_data++ = this_ch + ' ';
319        }
320    }
321    *ascii_data++ = '\n';       /* Append a courtesy newline */
322
323    /* rv is cleared on error */
324    (void)_PyString_Resize(&rv,
325                       (ascii_data -
326                        (unsigned char *)PyString_AS_STRING(rv)));
327    PyBuffer_Release(&pbin);
328    return rv;
329}
330
331
332static int
333binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
334{
335    /* Finds & returns the (num+1)th
336    ** valid character for base64, or -1 if none.
337    */
338
339    int ret = -1;
340    unsigned char c, b64val;
341
342    while ((slen > 0) && (ret == -1)) {
343        c = *s;
344        b64val = table_a2b_base64[c & 0x7f];
345        if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
346            if (num == 0)
347                ret = *s;
348            num--;
349        }
350
351        s++;
352        slen--;
353    }
354    return ret;
355}
356
357PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
358
359static PyObject *
360binascii_a2b_base64(PyObject *self, PyObject *args)
361{
362    Py_buffer pascii;
363    unsigned char *ascii_data, *bin_data;
364    int leftbits = 0;
365    unsigned char this_ch;
366    unsigned int leftchar = 0;
367    PyObject *rv;
368    Py_ssize_t ascii_len, bin_len;
369    int quad_pos = 0;
370
371    if ( !PyArg_ParseTuple(args, "s*:a2b_base64", &pascii) )
372        return NULL;
373    ascii_data = pascii.buf;
374    ascii_len = pascii.len;
375
376    assert(ascii_len >= 0);
377
378    if (ascii_len > PY_SSIZE_T_MAX - 3) {
379        PyBuffer_Release(&pascii);
380        return PyErr_NoMemory();
381    }
382
383    bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
384
385    /* Allocate the buffer */
386    if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
387        PyBuffer_Release(&pascii);
388        return NULL;
389    }
390    bin_data = (unsigned char *)PyString_AS_STRING(rv);
391    bin_len = 0;
392
393    for( ; ascii_len > 0; ascii_len--, ascii_data++) {
394        this_ch = *ascii_data;
395
396        if (this_ch > 0x7f ||
397            this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
398            continue;
399
400        /* Check for pad sequences and ignore
401        ** the invalid ones.
402        */
403        if (this_ch == BASE64_PAD) {
404            if ( (quad_pos < 2) ||
405                 ((quad_pos == 2) &&
406                  (binascii_find_valid(ascii_data, ascii_len, 1)
407                   != BASE64_PAD)) )
408            {
409                continue;
410            }
411            else {
412                /* A pad sequence means no more input.
413                ** We've already interpreted the data
414                ** from the quad at this point.
415                */
416                leftbits = 0;
417                break;
418            }
419        }
420
421        this_ch = table_a2b_base64[*ascii_data];
422        if ( this_ch == (unsigned char) -1 )
423            continue;
424
425        /*
426        ** Shift it in on the low end, and see if there's
427        ** a byte ready for output.
428        */
429        quad_pos = (quad_pos + 1) & 0x03;
430        leftchar = (leftchar << 6) | (this_ch);
431        leftbits += 6;
432
433        if ( leftbits >= 8 ) {
434            leftbits -= 8;
435            *bin_data++ = (leftchar >> leftbits) & 0xff;
436            bin_len++;
437            leftchar &= ((1 << leftbits) - 1);
438        }
439    }
440
441    if (leftbits != 0) {
442        PyBuffer_Release(&pascii);
443        PyErr_SetString(Error, "Incorrect padding");
444        Py_DECREF(rv);
445        return NULL;
446    }
447
448    /* And set string size correctly. If the result string is empty
449    ** (because the input was all invalid) return the shared empty
450    ** string instead; _PyString_Resize() won't do this for us.
451    */
452    if (bin_len > 0) {
453        /* rv is cleared on error */
454        (void)_PyString_Resize(&rv, bin_len);
455    }
456    else {
457        Py_DECREF(rv);
458        rv = PyString_FromStringAndSize("", 0);
459    }
460    PyBuffer_Release(&pascii);
461    return rv;
462}
463
464PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
465
466static PyObject *
467binascii_b2a_base64(PyObject *self, PyObject *args)
468{
469    Py_buffer pbuf;
470    unsigned char *ascii_data, *bin_data;
471    int leftbits = 0;
472    unsigned char this_ch;
473    unsigned int leftchar = 0;
474    PyObject *rv;
475    Py_ssize_t bin_len;
476
477    if ( !PyArg_ParseTuple(args, "s*:b2a_base64", &pbuf) )
478        return NULL;
479    bin_data = pbuf.buf;
480    bin_len = pbuf.len;
481
482    assert(bin_len >= 0);
483
484    if ( bin_len > BASE64_MAXBIN ) {
485        PyErr_SetString(Error, "Too much data for base64 line");
486        PyBuffer_Release(&pbuf);
487        return NULL;
488    }
489
490    /* We're lazy and allocate too much (fixed up later).
491       "+3" leaves room for up to two pad characters and a trailing
492       newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
493    if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
494        PyBuffer_Release(&pbuf);
495        return NULL;
496    }
497    ascii_data = (unsigned char *)PyString_AS_STRING(rv);
498
499    for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
500        /* Shift the data into our buffer */
501        leftchar = (leftchar << 8) | *bin_data;
502        leftbits += 8;
503
504        /* See if there are 6-bit groups ready */
505        while ( leftbits >= 6 ) {
506            this_ch = (leftchar >> (leftbits-6)) & 0x3f;
507            leftbits -= 6;
508            *ascii_data++ = table_b2a_base64[this_ch];
509        }
510    }
511    if ( leftbits == 2 ) {
512        *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
513        *ascii_data++ = BASE64_PAD;
514        *ascii_data++ = BASE64_PAD;
515    } else if ( leftbits == 4 ) {
516        *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
517        *ascii_data++ = BASE64_PAD;
518    }
519    *ascii_data++ = '\n';       /* Append a courtesy newline */
520
521    /* rv is cleared on error */
522    (void)_PyString_Resize(&rv,
523                       (ascii_data -
524                        (unsigned char *)PyString_AS_STRING(rv)));
525    PyBuffer_Release(&pbuf);
526    return rv;
527}
528
529PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
530
531static PyObject *
532binascii_a2b_hqx(PyObject *self, PyObject *args)
533{
534    Py_buffer pascii;
535    unsigned char *ascii_data, *bin_data;
536    int leftbits = 0;
537    unsigned char this_ch;
538    unsigned int leftchar = 0;
539    PyObject *rv;
540    Py_ssize_t len;
541    int done = 0;
542
543    if ( !PyArg_ParseTuple(args, "s*:a2b_hqx", &pascii) )
544        return NULL;
545    ascii_data = pascii.buf;
546    len = pascii.len;
547
548    assert(len >= 0);
549
550    if (len > PY_SSIZE_T_MAX - 2) {
551        PyBuffer_Release(&pascii);
552        return PyErr_NoMemory();
553    }
554
555    /* Allocate a string that is too big (fixed later)
556       Add two to the initial length to prevent interning which
557       would preclude subsequent resizing.  */
558    if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL ) {
559        PyBuffer_Release(&pascii);
560        return NULL;
561    }
562    bin_data = (unsigned char *)PyString_AS_STRING(rv);
563
564    for( ; len > 0 ; len--, ascii_data++ ) {
565        /* Get the byte and look it up */
566        this_ch = table_a2b_hqx[*ascii_data];
567        if ( this_ch == SKIP )
568            continue;
569        if ( this_ch == FAIL ) {
570            PyErr_SetString(Error, "Illegal char");
571            PyBuffer_Release(&pascii);
572            Py_DECREF(rv);
573            return NULL;
574        }
575        if ( this_ch == DONE ) {
576            /* The terminating colon */
577            done = 1;
578            break;
579        }
580
581        /* Shift it into the buffer and see if any bytes are ready */
582        leftchar = (leftchar << 6) | (this_ch);
583        leftbits += 6;
584        if ( leftbits >= 8 ) {
585            leftbits -= 8;
586            *bin_data++ = (leftchar >> leftbits) & 0xff;
587            leftchar &= ((1 << leftbits) - 1);
588        }
589    }
590
591    if ( leftbits && !done ) {
592        PyErr_SetString(Incomplete,
593                        "String has incomplete number of bytes");
594        PyBuffer_Release(&pascii);
595        Py_DECREF(rv);
596        return NULL;
597    }
598    /* rv is cleared on error */
599    if (_PyString_Resize(&rv,
600                       (bin_data -
601                        (unsigned char *)PyString_AS_STRING(rv))) == 0) {
602        PyObject *rrv = Py_BuildValue("Oi", rv, done);
603        PyBuffer_Release(&pascii);
604        Py_DECREF(rv);
605        return rrv;
606    }
607
608    PyBuffer_Release(&pascii);
609    return NULL;
610}
611
612PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
613
614static PyObject *
615binascii_rlecode_hqx(PyObject *self, PyObject *args)
616{
617    Py_buffer pbuf;
618    unsigned char *in_data, *out_data;
619    PyObject *rv;
620    unsigned char ch;
621    Py_ssize_t in, inend, len;
622
623    if ( !PyArg_ParseTuple(args, "s*:rlecode_hqx", &pbuf) )
624        return NULL;
625    in_data = pbuf.buf;
626    len = pbuf.len;
627
628    assert(len >= 0);
629
630    if (len > PY_SSIZE_T_MAX / 2 - 2) {
631        PyBuffer_Release(&pbuf);
632        return PyErr_NoMemory();
633    }
634
635    /* Worst case: output is twice as big as input (fixed later) */
636    if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
637        PyBuffer_Release(&pbuf);
638        return NULL;
639    }
640    out_data = (unsigned char *)PyString_AS_STRING(rv);
641
642    for( in=0; in<len; in++) {
643        ch = in_data[in];
644        if ( ch == RUNCHAR ) {
645            /* RUNCHAR. Escape it. */
646            *out_data++ = RUNCHAR;
647            *out_data++ = 0;
648        } else {
649            /* Check how many following are the same */
650            for(inend=in+1;
651                inend<len && in_data[inend] == ch &&
652                    inend < in+255;
653                inend++) ;
654            if ( inend - in > 3 ) {
655                /* More than 3 in a row. Output RLE. */
656                *out_data++ = ch;
657                *out_data++ = RUNCHAR;
658                *out_data++ = inend-in;
659                in = inend-1;
660            } else {
661                /* Less than 3. Output the byte itself */
662                *out_data++ = ch;
663            }
664        }
665    }
666    /* rv is cleared on error */
667    (void)_PyString_Resize(&rv,
668                       (out_data -
669                        (unsigned char *)PyString_AS_STRING(rv)));
670    PyBuffer_Release(&pbuf);
671    return rv;
672}
673
674PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
675
676static PyObject *
677binascii_b2a_hqx(PyObject *self, PyObject *args)
678{
679    Py_buffer pbin;
680    unsigned char *ascii_data, *bin_data;
681    int leftbits = 0;
682    unsigned char this_ch;
683    unsigned int leftchar = 0;
684    PyObject *rv;
685    Py_ssize_t len;
686
687    if ( !PyArg_ParseTuple(args, "s*:b2a_hqx", &pbin) )
688        return NULL;
689    bin_data = pbin.buf;
690    len = pbin.len;
691
692    assert(len >= 0);
693
694    if (len > PY_SSIZE_T_MAX / 2 - 2) {
695        PyBuffer_Release(&pbin);
696        return PyErr_NoMemory();
697    }
698
699    /* Allocate a buffer that is at least large enough */
700    if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
701        PyBuffer_Release(&pbin);
702        return NULL;
703    }
704    ascii_data = (unsigned char *)PyString_AS_STRING(rv);
705
706    for( ; len > 0 ; len--, bin_data++ ) {
707        /* Shift into our buffer, and output any 6bits ready */
708        leftchar = (leftchar << 8) | *bin_data;
709        leftbits += 8;
710        while ( leftbits >= 6 ) {
711            this_ch = (leftchar >> (leftbits-6)) & 0x3f;
712            leftbits -= 6;
713            *ascii_data++ = table_b2a_hqx[this_ch];
714        }
715    }
716    /* Output a possible runt byte */
717    if ( leftbits ) {
718        leftchar <<= (6-leftbits);
719        *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
720    }
721    /* rv is cleared on error */
722    (void)_PyString_Resize(&rv,
723                       (ascii_data -
724                        (unsigned char *)PyString_AS_STRING(rv)));
725    PyBuffer_Release(&pbin);
726    return rv;
727}
728
729PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
730
731static PyObject *
732binascii_rledecode_hqx(PyObject *self, PyObject *args)
733{
734    Py_buffer pin;
735    unsigned char *in_data, *out_data;
736    unsigned char in_byte, in_repeat;
737    PyObject *rv;
738    Py_ssize_t in_len, out_len, out_len_left;
739
740    if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
741        return NULL;
742    in_data = pin.buf;
743    in_len = pin.len;
744
745    assert(in_len >= 0);
746
747    /* Empty string is a special case */
748    if ( in_len == 0 ) {
749        PyBuffer_Release(&pin);
750        return PyString_FromStringAndSize("", 0);
751    }
752    else if (in_len > PY_SSIZE_T_MAX / 2) {
753        PyBuffer_Release(&pin);
754        return PyErr_NoMemory();
755    }
756
757    /* Allocate a buffer of reasonable size. Resized when needed */
758    out_len = in_len*2;
759    if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) {
760        PyBuffer_Release(&pin);
761        return NULL;
762    }
763    out_len_left = out_len;
764    out_data = (unsigned char *)PyString_AS_STRING(rv);
765
766    /*
767    ** We need two macros here to get/put bytes and handle
768    ** end-of-buffer for input and output strings.
769    */
770#define INBYTE(b) \
771    do { \
772             if ( --in_len < 0 ) { \
773                       PyErr_SetString(Incomplete, ""); \
774                       Py_DECREF(rv); \
775                       PyBuffer_Release(&pin); \
776                       return NULL; \
777             } \
778             b = *in_data++; \
779    } while(0)
780
781#define OUTBYTE(b) \
782    do { \
783             if ( --out_len_left < 0 ) { \
784                      if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
785                      if (_PyString_Resize(&rv, 2*out_len) < 0) \
786                        { PyBuffer_Release(&pin); return NULL; } \
787                      out_data = (unsigned char *)PyString_AS_STRING(rv) \
788                                                             + out_len; \
789                      out_len_left = out_len-1; \
790                      out_len = out_len * 2; \
791             } \
792             *out_data++ = b; \
793    } while(0)
794
795        /*
796        ** Handle first byte separately (since we have to get angry
797        ** in case of an orphaned RLE code).
798        */
799        INBYTE(in_byte);
800
801    if (in_byte == RUNCHAR) {
802        INBYTE(in_repeat);
803        if (in_repeat != 0) {
804            /* Note Error, not Incomplete (which is at the end
805            ** of the string only). This is a programmer error.
806            */
807            PyErr_SetString(Error, "Orphaned RLE code at start");
808            PyBuffer_Release(&pin);
809            Py_DECREF(rv);
810            return NULL;
811        }
812        OUTBYTE(RUNCHAR);
813    } else {
814        OUTBYTE(in_byte);
815    }
816
817    while( in_len > 0 ) {
818        INBYTE(in_byte);
819
820        if (in_byte == RUNCHAR) {
821            INBYTE(in_repeat);
822            if ( in_repeat == 0 ) {
823                /* Just an escaped RUNCHAR value */
824                OUTBYTE(RUNCHAR);
825            } else {
826                /* Pick up value and output a sequence of it */
827                in_byte = out_data[-1];
828                while ( --in_repeat > 0 )
829                    OUTBYTE(in_byte);
830            }
831        } else {
832            /* Normal byte */
833            OUTBYTE(in_byte);
834        }
835    }
836    /* rv is cleared on error */
837    (void)_PyString_Resize(&rv,
838                       (out_data -
839                        (unsigned char *)PyString_AS_STRING(rv)));
840    PyBuffer_Release(&pin);
841    return rv;
842}
843
844PyDoc_STRVAR(doc_crc_hqx,
845"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
846
847static PyObject *
848binascii_crc_hqx(PyObject *self, PyObject *args)
849{
850    Py_buffer pin;
851    unsigned char *bin_data;
852    unsigned int crc;
853    Py_ssize_t len;
854
855    if ( !PyArg_ParseTuple(args, "s*i:crc_hqx", &pin, &crc) )
856        return NULL;
857    bin_data = pin.buf;
858    len = pin.len;
859
860    while(len-- > 0) {
861        crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
862    }
863
864    PyBuffer_Release(&pin);
865    return Py_BuildValue("i", crc);
866}
867
868PyDoc_STRVAR(doc_crc32,
869"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
870
871#ifdef USE_ZLIB_CRC32
872/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
873static PyObject *
874binascii_crc32(PyObject *self, PyObject *args)
875{
876    unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
877    Py_buffer pbuf;
878    Byte *buf;
879    Py_ssize_t len;
880    int signed_val;
881
882    if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val))
883        return NULL;
884    /* In Python 2.x we return a signed integer regardless of native platform
885     * long size (the 32bit unsigned long is treated as 32-bit signed and sign
886     * extended into a 64-bit long inside the integer object).  3.0 does the
887     * right thing and returns unsigned. http://bugs.python.org/issue1202 */
888    buf = (Byte*)pbuf.buf;
889    len = pbuf.len;
890    signed_val = crc32(crc32val, buf, len);
891    PyBuffer_Release(&pbuf);
892    return PyInt_FromLong(signed_val);
893}
894#else  /* USE_ZLIB_CRC32 */
895/*  Crc - 32 BIT ANSI X3.66 CRC checksum files
896    Also known as: ISO 3307
897**********************************************************************|
898*                                                                    *|
899* Demonstration program to compute the 32-bit CRC used as the frame  *|
900* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
901* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
902* protocol).  The 32-bit FCS was added via the Federal Register,     *|
903* 1 June 1982, p.23798.  I presume but don't know for certain that   *|
904* this polynomial is or will be included in CCITT V.41, which        *|
905* defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
906* PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
907* errors by a factor of 10^-5 over 16-bit FCS.                       *|
908*                                                                    *|
909**********************************************************************|
910
911 Copyright (C) 1986 Gary S. Brown.  You may use this program, or
912 code or tables extracted from it, as desired without restriction.
913
914 First, the polynomial itself and its table of feedback terms.  The
915 polynomial is
916 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
917 Note that we take it "backwards" and put the highest-order term in
918 the lowest-order bit.  The X^32 term is "implied"; the LSB is the
919 X^31 term, etc.  The X^0 term (usually shown as "+1") results in
920 the MSB being 1.
921
922 Note that the usual hardware shift register implementation, which
923 is what we're using (we're merely optimizing it by doing eight-bit
924 chunks at a time) shifts bits into the lowest-order term.  In our
925 implementation, that means shifting towards the right.  Why do we
926 do it this way?  Because the calculated CRC must be transmitted in
927 order from highest-order term to lowest-order term.  UARTs transmit
928 characters in order from LSB to MSB.  By storing the CRC this way,
929 we hand it to the UART in the order low-byte to high-byte; the UART
930 sends each low-bit to hight-bit; and the result is transmission bit
931 by bit from highest- to lowest-order term without requiring any bit
932 shuffling on our part.  Reception works similarly.
933
934 The feedback terms table consists of 256, 32-bit entries.  Notes:
935
936  1. The table can be generated at runtime if desired; code to do so
937     is shown later.  It might not be obvious, but the feedback
938     terms simply represent the results of eight shift/xor opera-
939     tions for all combinations of data and CRC register values.
940
941  2. The CRC accumulation logic is the same for all CRC polynomials,
942     be they sixteen or thirty-two bits wide.  You simply choose the
943     appropriate table.  Alternatively, because the table can be
944     generated at runtime, you can start by generating the table for
945     the polynomial in question and use exactly the same "updcrc",
946     if your application needn't simultaneously handle two CRC
947     polynomials.  (Note, however, that XMODEM is strange.)
948
949  3. For 16-bit CRCs, the table entries need be only 16 bits wide;
950     of course, 32-bit entries work OK if the high 16 bits are zero.
951
952  4. The values must be right-shifted by eight bits by the "updcrc"
953     logic; the shift must be unsigned (bring in zeroes).  On some
954     hardware you could probably optimize the shift in assembler by
955     using byte-swap instructions.
956********************************************************************/
957
958static unsigned int crc_32_tab[256] = {
9590x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
9600x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
9610xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
9620x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
9630x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
9640x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
9650xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
9660xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
9670x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
9680x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
9690xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
9700xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
9710x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
9720x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
9730x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
9740xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
9750x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
9760x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
9770x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
9780xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
9790x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
9800x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
9810xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
9820xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
9830x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
9840x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
9850x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
9860x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
9870xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
9880x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
9890x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
9900x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
9910xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
9920xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
9930x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
9940x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
9950xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
9960xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
9970x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
9980x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
9990x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
10000xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
10010x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
10020x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
10030x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
10040xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
10050x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
10060x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
10070xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
10080xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
10090x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
10100x2d02ef8dU
1011};
1012
1013static PyObject *
1014binascii_crc32(PyObject *self, PyObject *args)
1015{ /* By Jim Ahlstrom; All rights transferred to CNRI */
1016    Py_buffer pbin;
1017    unsigned char *bin_data;
1018    unsigned int crc = 0U;      /* initial value of CRC */
1019    Py_ssize_t len;
1020    int result;
1021
1022    if ( !PyArg_ParseTuple(args, "s*|I:crc32", &pbin, &crc) )
1023        return NULL;
1024    bin_data = pbin.buf;
1025    len = pbin.len;
1026
1027    crc = ~ crc;
1028    while (len-- > 0)
1029        crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
1030        /* Note:  (crc >> 8) MUST zero fill on left */
1031
1032    result = (int)(crc ^ 0xFFFFFFFFU);
1033    PyBuffer_Release(&pbin);
1034    return PyInt_FromLong(result);
1035}
1036#endif  /* USE_ZLIB_CRC32 */
1037
1038
1039static PyObject *
1040binascii_hexlify(PyObject *self, PyObject *args)
1041{
1042    Py_buffer parg;
1043    char* argbuf;
1044    Py_ssize_t arglen;
1045    PyObject *retval;
1046    char* retbuf;
1047    Py_ssize_t i, j;
1048
1049    if (!PyArg_ParseTuple(args, "s*:b2a_hex", &parg))
1050        return NULL;
1051    argbuf = parg.buf;
1052    arglen = parg.len;
1053
1054    assert(arglen >= 0);
1055    if (arglen > PY_SSIZE_T_MAX / 2) {
1056        PyBuffer_Release(&parg);
1057        return PyErr_NoMemory();
1058    }
1059
1060    retval = PyString_FromStringAndSize(NULL, arglen*2);
1061    if (!retval) {
1062        PyBuffer_Release(&parg);
1063        return NULL;
1064    }
1065    retbuf = PyString_AS_STRING(retval);
1066
1067    /* make hex version of string, taken from shamodule.c */
1068    for (i=j=0; i < arglen; i++) {
1069        char c;
1070        c = (argbuf[i] >> 4) & 0xf;
1071        c = (c>9) ? c+'a'-10 : c + '0';
1072        retbuf[j++] = c;
1073        c = argbuf[i] & 0xf;
1074        c = (c>9) ? c+'a'-10 : c + '0';
1075        retbuf[j++] = c;
1076    }
1077    PyBuffer_Release(&parg);
1078    return retval;
1079}
1080
1081PyDoc_STRVAR(doc_hexlify,
1082"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1083\n\
1084This function is also available as \"hexlify()\".");
1085
1086
1087static int
1088to_int(int c)
1089{
1090    if (isdigit(c))
1091        return c - '0';
1092    else {
1093        if (Py_ISUPPER(c))
1094            c = Py_TOLOWER(c);
1095        if (c >= 'a' && c <= 'f')
1096            return c - 'a' + 10;
1097    }
1098    return -1;
1099}
1100
1101
1102static PyObject *
1103binascii_unhexlify(PyObject *self, PyObject *args)
1104{
1105    Py_buffer parg;
1106    char* argbuf;
1107    Py_ssize_t arglen;
1108    PyObject *retval;
1109    char* retbuf;
1110    Py_ssize_t i, j;
1111
1112    if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
1113        return NULL;
1114    argbuf = parg.buf;
1115    arglen = parg.len;
1116
1117    assert(arglen >= 0);
1118
1119    /* XXX What should we do about strings with an odd length?  Should
1120     * we add an implicit leading zero, or a trailing zero?  For now,
1121     * raise an exception.
1122     */
1123    if (arglen % 2) {
1124        PyBuffer_Release(&parg);
1125        PyErr_SetString(PyExc_TypeError, "Odd-length string");
1126        return NULL;
1127    }
1128
1129    retval = PyString_FromStringAndSize(NULL, (arglen/2));
1130    if (!retval) {
1131        PyBuffer_Release(&parg);
1132        return NULL;
1133    }
1134    retbuf = PyString_AS_STRING(retval);
1135
1136    for (i=j=0; i < arglen; i += 2) {
1137        int top = to_int(Py_CHARMASK(argbuf[i]));
1138        int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1139        if (top == -1 || bot == -1) {
1140            PyErr_SetString(PyExc_TypeError,
1141                            "Non-hexadecimal digit found");
1142            goto finally;
1143        }
1144        retbuf[j++] = (top << 4) + bot;
1145    }
1146    PyBuffer_Release(&parg);
1147    return retval;
1148
1149  finally:
1150    PyBuffer_Release(&parg);
1151    Py_DECREF(retval);
1152    return NULL;
1153}
1154
1155PyDoc_STRVAR(doc_unhexlify,
1156"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1157\n\
1158hexstr must contain an even number of hex digits (upper or lower case).\n\
1159This function is also available as \"unhexlify()\"");
1160
1161static int table_hex[128] = {
1162  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1163  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1164  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1165   0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1166  -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1167  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1168  -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1169  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1170};
1171
1172#define hexval(c) table_hex[(unsigned int)(c)]
1173
1174#define MAXLINESIZE 76
1175
1176PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1177
1178static PyObject*
1179binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1180{
1181    Py_ssize_t in, out;
1182    char ch;
1183    Py_buffer pdata;
1184    unsigned char *data, *odata;
1185    Py_ssize_t datalen = 0;
1186    PyObject *rv;
1187    static char *kwlist[] = {"data", "header", NULL};
1188    int header = 0;
1189
1190    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
1191          &header))
1192        return NULL;
1193    data = pdata.buf;
1194    datalen = pdata.len;
1195
1196    /* We allocate the output same size as input, this is overkill.
1197     * The previous implementation used calloc() so we'll zero out the
1198     * memory here too, since PyMem_Malloc() does not guarantee that.
1199     */
1200    odata = (unsigned char *) PyMem_Malloc(datalen);
1201    if (odata == NULL) {
1202        PyBuffer_Release(&pdata);
1203        PyErr_NoMemory();
1204        return NULL;
1205    }
1206    memset(odata, 0, datalen);
1207
1208    in = out = 0;
1209    while (in < datalen) {
1210        if (data[in] == '=') {
1211            in++;
1212            if (in >= datalen) break;
1213            /* Soft line breaks */
1214            if ((data[in] == '\n') || (data[in] == '\r')) {
1215                if (data[in] != '\n') {
1216                    while (in < datalen && data[in] != '\n') in++;
1217                }
1218                if (in < datalen) in++;
1219            }
1220            else if (data[in] == '=') {
1221                /* broken case from broken python qp */
1222                odata[out++] = '=';
1223                in++;
1224            }
1225            else if ((in + 1 < datalen) &&
1226                     ((data[in] >= 'A' && data[in] <= 'F') ||
1227                      (data[in] >= 'a' && data[in] <= 'f') ||
1228                      (data[in] >= '0' && data[in] <= '9')) &&
1229                     ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1230                      (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1231                      (data[in+1] >= '0' && data[in+1] <= '9'))) {
1232                /* hexval */
1233                ch = hexval(data[in]) << 4;
1234                in++;
1235                ch |= hexval(data[in]);
1236                in++;
1237                odata[out++] = ch;
1238            }
1239            else {
1240              odata[out++] = '=';
1241            }
1242        }
1243        else if (header && data[in] == '_') {
1244            odata[out++] = ' ';
1245            in++;
1246        }
1247        else {
1248            odata[out] = data[in];
1249            in++;
1250            out++;
1251        }
1252    }
1253    if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1254        PyBuffer_Release(&pdata);
1255        PyMem_Free(odata);
1256        return NULL;
1257    }
1258    PyBuffer_Release(&pdata);
1259    PyMem_Free(odata);
1260    return rv;
1261}
1262
1263static int
1264to_hex (unsigned char ch, unsigned char *s)
1265{
1266    unsigned int uvalue = ch;
1267
1268    s[1] = "0123456789ABCDEF"[uvalue % 16];
1269    uvalue = (uvalue / 16);
1270    s[0] = "0123456789ABCDEF"[uvalue % 16];
1271    return 0;
1272}
1273
1274PyDoc_STRVAR(doc_b2a_qp,
1275"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1276 Encode a string using quoted-printable encoding. \n\
1277\n\
1278On encoding, when istext is set, newlines are not encoded, and white \n\
1279space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1280both encoded.  When quotetabs is set, space and tabs are encoded.");
1281
1282/* XXX: This is ridiculously complicated to be backward compatible
1283 * (mostly) with the quopri module.  It doesn't re-create the quopri
1284 * module bug where text ending in CRLF has the CR encoded */
1285static PyObject*
1286binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1287{
1288    Py_ssize_t in, out;
1289    Py_buffer pdata;
1290    unsigned char *data, *odata;
1291    Py_ssize_t datalen = 0, odatalen = 0;
1292    PyObject *rv;
1293    unsigned int linelen = 0;
1294    static char *kwlist[] = {"data", "quotetabs", "istext",
1295                                   "header", NULL};
1296    int istext = 1;
1297    int quotetabs = 0;
1298    int header = 0;
1299    unsigned char ch;
1300    int crlf = 0;
1301    unsigned char *p;
1302
1303    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|iii", kwlist, &pdata,
1304          &quotetabs, &istext, &header))
1305        return NULL;
1306    data = pdata.buf;
1307    datalen = pdata.len;
1308
1309    /* See if this string is using CRLF line ends */
1310    /* XXX: this function has the side effect of converting all of
1311     * the end of lines to be the same depending on this detection
1312     * here */
1313    p = (unsigned char *) memchr(data, '\n', datalen);
1314    if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1315        crlf = 1;
1316
1317    /* First, scan to see how many characters need to be encoded */
1318    in = 0;
1319    while (in < datalen) {
1320        Py_ssize_t delta = 0;
1321        if ((data[in] > 126) ||
1322            (data[in] == '=') ||
1323            (header && data[in] == '_') ||
1324            ((data[in] == '.') && (linelen == 0) &&
1325             (in + 1 == datalen || data[in+1] == '\n' ||
1326              data[in+1] == '\r' || data[in+1] == 0)) ||
1327            (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1328            ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1329            ((data[in] < 33) &&
1330             (data[in] != '\r') && (data[in] != '\n') &&
1331             (quotetabs ||
1332            (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1333        {
1334            if ((linelen + 3) >= MAXLINESIZE) {
1335                linelen = 0;
1336                if (crlf)
1337                    delta += 3;
1338                else
1339                    delta += 2;
1340            }
1341            linelen += 3;
1342            delta += 3;
1343            in++;
1344        }
1345        else {
1346            if (istext &&
1347                ((data[in] == '\n') ||
1348                 ((in+1 < datalen) && (data[in] == '\r') &&
1349                 (data[in+1] == '\n'))))
1350            {
1351                linelen = 0;
1352                /* Protect against whitespace on end of line */
1353                if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1354                    delta += 2;
1355                if (crlf)
1356                    delta += 2;
1357                else
1358                    delta += 1;
1359                if (data[in] == '\r')
1360                    in += 2;
1361                else
1362                    in++;
1363            }
1364            else {
1365                if ((in + 1 != datalen) &&
1366                    (data[in+1] != '\n') &&
1367                    (linelen + 1) >= MAXLINESIZE) {
1368                    linelen = 0;
1369                    if (crlf)
1370                        delta += 3;
1371                    else
1372                        delta += 2;
1373                }
1374                linelen++;
1375                delta++;
1376                in++;
1377            }
1378        }
1379        if (PY_SSIZE_T_MAX - delta < odatalen) {
1380            PyBuffer_Release(&pdata);
1381            PyErr_NoMemory();
1382            return NULL;
1383        }
1384        odatalen += delta;
1385    }
1386
1387    /* We allocate the output same size as input, this is overkill.
1388     * The previous implementation used calloc() so we'll zero out the
1389     * memory here too, since PyMem_Malloc() does not guarantee that.
1390     */
1391    odata = (unsigned char *) PyMem_Malloc(odatalen);
1392    if (odata == NULL) {
1393        PyBuffer_Release(&pdata);
1394        PyErr_NoMemory();
1395        return NULL;
1396    }
1397    memset(odata, 0, odatalen);
1398
1399    in = out = linelen = 0;
1400    while (in < datalen) {
1401        if ((data[in] > 126) ||
1402            (data[in] == '=') ||
1403            (header && data[in] == '_') ||
1404            ((data[in] == '.') && (linelen == 0) &&
1405             (in + 1 == datalen || data[in+1] == '\n' ||
1406              data[in+1] == '\r' || data[in+1] == 0)) ||
1407            (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1408            ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1409            ((data[in] < 33) &&
1410             (data[in] != '\r') && (data[in] != '\n') &&
1411             (quotetabs || ((data[in] != '\t') && (data[in] != ' ')))))
1412        {
1413            if ((linelen + 3 )>= MAXLINESIZE) {
1414                odata[out++] = '=';
1415                if (crlf) odata[out++] = '\r';
1416                odata[out++] = '\n';
1417                linelen = 0;
1418            }
1419            odata[out++] = '=';
1420            to_hex(data[in], &odata[out]);
1421            out += 2;
1422            in++;
1423            linelen += 3;
1424        }
1425        else {
1426            if (istext &&
1427                ((data[in] == '\n') ||
1428                 ((in+1 < datalen) && (data[in] == '\r') &&
1429                 (data[in+1] == '\n'))))
1430            {
1431                linelen = 0;
1432                /* Protect against whitespace on end of line */
1433                if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1434                    ch = odata[out-1];
1435                    odata[out-1] = '=';
1436                    to_hex(ch, &odata[out]);
1437                    out += 2;
1438                }
1439
1440                if (crlf) odata[out++] = '\r';
1441                odata[out++] = '\n';
1442                if (data[in] == '\r')
1443                    in += 2;
1444                else
1445                    in++;
1446            }
1447            else {
1448                if ((in + 1 != datalen) &&
1449                    (data[in+1] != '\n') &&
1450                    (linelen + 1) >= MAXLINESIZE) {
1451                    odata[out++] = '=';
1452                    if (crlf) odata[out++] = '\r';
1453                    odata[out++] = '\n';
1454                    linelen = 0;
1455                }
1456                linelen++;
1457                if (header && data[in] == ' ') {
1458                    odata[out++] = '_';
1459                    in++;
1460                }
1461                else {
1462                    odata[out++] = data[in++];
1463                }
1464            }
1465        }
1466    }
1467    if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1468        PyBuffer_Release(&pdata);
1469        PyMem_Free(odata);
1470        return NULL;
1471    }
1472    PyBuffer_Release(&pdata);
1473    PyMem_Free(odata);
1474    return rv;
1475}
1476
1477/* List of functions defined in the module */
1478
1479static struct PyMethodDef binascii_module_methods[] = {
1480    {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1481    {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1482    {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1483    {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1484    {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1485    {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1486    {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1487    {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1488    {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1489    {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1490    {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1491    {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1492     doc_rledecode_hqx},
1493    {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1494    {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1495    {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1496      doc_a2b_qp},
1497    {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1498      doc_b2a_qp},
1499    {NULL, NULL}                             /* sentinel */
1500};
1501
1502
1503/* Initialization function for the module (*must* be called initbinascii) */
1504PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1505
1506PyMODINIT_FUNC
1507initbinascii(void)
1508{
1509    PyObject *m, *d, *x;
1510
1511    /* Create the module and add the functions */
1512    m = Py_InitModule("binascii", binascii_module_methods);
1513    if (m == NULL)
1514        return;
1515
1516    d = PyModule_GetDict(m);
1517    x = PyString_FromString(doc_binascii);
1518    PyDict_SetItemString(d, "__doc__", x);
1519    Py_XDECREF(x);
1520
1521    Error = PyErr_NewException("binascii.Error", NULL, NULL);
1522    PyDict_SetItemString(d, "Error", Error);
1523    Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1524    PyDict_SetItemString(d, "Incomplete", Incomplete);
1525}
1526