1/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002  Python Software Foundation; All Rights Reserved
7
8*/
9
10#include "Python.h"
11#include <stdio.h>
12#include <bzlib.h>
13#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22    Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
37#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED   0
40#define MODE_READ     1
41#define MODE_READ_EOF 2
42#define MODE_WRITE    3
43
44
45#ifndef BZ_CONFIG_ERROR
46
47#define BZ2_bzRead bzRead
48#define BZ2_bzReadOpen bzReadOpen
49#define BZ2_bzReadClose bzReadClose
50#define BZ2_bzWrite bzWrite
51#define BZ2_bzWriteOpen bzWriteOpen
52#define BZ2_bzWriteClose bzWriteClose
53#define BZ2_bzCompress bzCompress
54#define BZ2_bzCompressInit bzCompressInit
55#define BZ2_bzCompressEnd bzCompressEnd
56#define BZ2_bzDecompress bzDecompress
57#define BZ2_bzDecompressInit bzDecompressInit
58#define BZ2_bzDecompressEnd bzDecompressEnd
59
60#endif /* ! BZ_CONFIG_ERROR */
61
62
63#ifdef WITH_THREAD
64#define ACQUIRE_LOCK(obj) do { \
65    if (!PyThread_acquire_lock(obj->lock, 0)) { \
66        Py_BEGIN_ALLOW_THREADS \
67        PyThread_acquire_lock(obj->lock, 1); \
68        Py_END_ALLOW_THREADS \
69    } } while(0)
70#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71#else
72#define ACQUIRE_LOCK(obj)
73#define RELEASE_LOCK(obj)
74#endif
75
76#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
77
78/* Bits in f_newlinetypes */
79#define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
80#define NEWLINE_CR 1            /* \r newline seen */
81#define NEWLINE_LF 2            /* \n newline seen */
82#define NEWLINE_CRLF 4          /* \r\n newline seen */
83
84/* ===================================================================== */
85/* Structure definitions. */
86
87typedef struct {
88    PyObject_HEAD
89    PyObject *file;
90
91    char* f_buf;                /* Allocated readahead buffer */
92    char* f_bufend;             /* Points after last occupied position */
93    char* f_bufptr;             /* Current buffer position */
94
95    int f_softspace;            /* Flag used by 'print' command */
96
97    int f_univ_newline;         /* Handle any newline convention */
98    int f_newlinetypes;         /* Types of newlines seen */
99    int f_skipnextlf;           /* Skip next \n */
100
101    BZFILE *fp;
102    int mode;
103    Py_off_t pos;
104    Py_off_t size;
105#ifdef WITH_THREAD
106    PyThread_type_lock lock;
107#endif
108} BZ2FileObject;
109
110typedef struct {
111    PyObject_HEAD
112    bz_stream bzs;
113    int running;
114#ifdef WITH_THREAD
115    PyThread_type_lock lock;
116#endif
117} BZ2CompObject;
118
119typedef struct {
120    PyObject_HEAD
121    bz_stream bzs;
122    int running;
123    PyObject *unused_data;
124#ifdef WITH_THREAD
125    PyThread_type_lock lock;
126#endif
127} BZ2DecompObject;
128
129/* ===================================================================== */
130/* Utility functions. */
131
132/* Refuse regular I/O if there's data in the iteration-buffer.
133 * Mixing them would cause data to arrive out of order, as the read*
134 * methods don't use the iteration buffer. */
135static int
136check_iterbuffered(BZ2FileObject *f)
137{
138    if (f->f_buf != NULL &&
139        (f->f_bufend - f->f_bufptr) > 0 &&
140        f->f_buf[0] != '\0') {
141        PyErr_SetString(PyExc_ValueError,
142            "Mixing iteration and read methods would lose data");
143        return -1;
144    }
145    return 0;
146}
147
148static int
149Util_CatchBZ2Error(int bzerror)
150{
151    int ret = 0;
152    switch(bzerror) {
153        case BZ_OK:
154        case BZ_STREAM_END:
155            break;
156
157#ifdef BZ_CONFIG_ERROR
158        case BZ_CONFIG_ERROR:
159            PyErr_SetString(PyExc_SystemError,
160                            "the bz2 library was not compiled "
161                            "correctly");
162            ret = 1;
163            break;
164#endif
165
166        case BZ_PARAM_ERROR:
167            PyErr_SetString(PyExc_ValueError,
168                            "the bz2 library has received wrong "
169                            "parameters");
170            ret = 1;
171            break;
172
173        case BZ_MEM_ERROR:
174            PyErr_NoMemory();
175            ret = 1;
176            break;
177
178        case BZ_DATA_ERROR:
179        case BZ_DATA_ERROR_MAGIC:
180            PyErr_SetString(PyExc_IOError, "invalid data stream");
181            ret = 1;
182            break;
183
184        case BZ_IO_ERROR:
185            PyErr_SetString(PyExc_IOError, "unknown IO error");
186            ret = 1;
187            break;
188
189        case BZ_UNEXPECTED_EOF:
190            PyErr_SetString(PyExc_EOFError,
191                            "compressed file ended before the "
192                            "logical end-of-stream was detected");
193            ret = 1;
194            break;
195
196        case BZ_SEQUENCE_ERROR:
197            PyErr_SetString(PyExc_RuntimeError,
198                            "wrong sequence of bz2 library "
199                            "commands used");
200            ret = 1;
201            break;
202    }
203    return ret;
204}
205
206#if BUFSIZ < 8192
207#define SMALLCHUNK 8192
208#else
209#define SMALLCHUNK BUFSIZ
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216    /* Expand the buffer by an amount proportional to the current size,
217       giving us amortized linear-time behavior. Use a less-than-double
218       growth factor to avoid excessive allocation. */
219    return currentsize + (currentsize >> 3) + 6;
220}
221
222static int
223Util_GrowBuffer(PyObject **buf)
224{
225    size_t size = PyString_GET_SIZE(*buf);
226    size_t new_size = Util_NewBufferSize(size);
227    if (new_size > size) {
228        return _PyString_Resize(buf, new_size);
229    } else {  /* overflow */
230        PyErr_SetString(PyExc_OverflowError,
231                        "Unable to allocate buffer - output too large");
232        return -1;
233    }
234}
235
236/* This is a hacked version of Python's fileobject.c:get_line(). */
237static PyObject *
238Util_GetLine(BZ2FileObject *f, int n)
239{
240    char c;
241    char *buf, *end;
242    size_t total_v_size;        /* total # of slots in buffer */
243    size_t used_v_size;         /* # used slots in buffer */
244    size_t increment;       /* amount to increment the buffer */
245    PyObject *v;
246    int bzerror;
247    int bytes_read;
248    int newlinetypes = f->f_newlinetypes;
249    int skipnextlf = f->f_skipnextlf;
250    int univ_newline = f->f_univ_newline;
251
252    total_v_size = n > 0 ? n : 100;
253    v = PyString_FromStringAndSize((char *)NULL, total_v_size);
254    if (v == NULL)
255        return NULL;
256
257    buf = BUF(v);
258    end = buf + total_v_size;
259
260    for (;;) {
261        Py_BEGIN_ALLOW_THREADS
262        while (buf != end) {
263            bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
264            f->pos++;
265            if (bytes_read == 0) break;
266            if (univ_newline) {
267                if (skipnextlf) {
268                    skipnextlf = 0;
269                    if (c == '\n') {
270                        /* Seeing a \n here with skipnextlf true means we
271                         * saw a \r before.
272                         */
273                        newlinetypes |= NEWLINE_CRLF;
274                        if (bzerror != BZ_OK) break;
275                        bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276                        f->pos++;
277                        if (bytes_read == 0) break;
278                    } else {
279                        newlinetypes |= NEWLINE_CR;
280                    }
281                }
282                if (c == '\r') {
283                    skipnextlf = 1;
284                    c = '\n';
285                } else if (c == '\n')
286                    newlinetypes |= NEWLINE_LF;
287            }
288            *buf++ = c;
289            if (bzerror != BZ_OK || c == '\n') break;
290        }
291        if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
292            newlinetypes |= NEWLINE_CR;
293        Py_END_ALLOW_THREADS
294        f->f_newlinetypes = newlinetypes;
295        f->f_skipnextlf = skipnextlf;
296        if (bzerror == BZ_STREAM_END) {
297            f->size = f->pos;
298            f->mode = MODE_READ_EOF;
299            break;
300        } else if (bzerror != BZ_OK) {
301            Util_CatchBZ2Error(bzerror);
302            Py_DECREF(v);
303            return NULL;
304        }
305        if (c == '\n')
306            break;
307        /* Must be because buf == end */
308        if (n > 0)
309            break;
310        used_v_size = total_v_size;
311        increment = total_v_size >> 2; /* mild exponential growth */
312        total_v_size += increment;
313        if (total_v_size > INT_MAX) {
314            PyErr_SetString(PyExc_OverflowError,
315                "line is longer than a Python string can hold");
316            Py_DECREF(v);
317            return NULL;
318        }
319        if (_PyString_Resize(&v, total_v_size) < 0)
320            return NULL;
321        buf = BUF(v) + used_v_size;
322        end = BUF(v) + total_v_size;
323    }
324
325    used_v_size = buf - BUF(v);
326    if (used_v_size != total_v_size)
327        _PyString_Resize(&v, used_v_size);
328    return v;
329}
330
331/* This is a hacked version of Python's
332 * fileobject.c:Py_UniversalNewlineFread(). */
333size_t
334Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
335                     char* buf, size_t n, BZ2FileObject *f)
336{
337    char *dst = buf;
338    int newlinetypes, skipnextlf;
339
340    assert(buf != NULL);
341    assert(stream != NULL);
342
343    if (!f->f_univ_newline)
344        return BZ2_bzRead(bzerror, stream, buf, n);
345
346    newlinetypes = f->f_newlinetypes;
347    skipnextlf = f->f_skipnextlf;
348
349    /* Invariant:  n is the number of bytes remaining to be filled
350     * in the buffer.
351     */
352    while (n) {
353        size_t nread;
354        int shortread;
355        char *src = dst;
356
357        nread = BZ2_bzRead(bzerror, stream, dst, n);
358        assert(nread <= n);
359        n -= nread; /* assuming 1 byte out for each in; will adjust */
360        shortread = n != 0;             /* true iff EOF or error */
361        while (nread--) {
362            char c = *src++;
363            if (c == '\r') {
364                /* Save as LF and set flag to skip next LF. */
365                *dst++ = '\n';
366                skipnextlf = 1;
367            }
368            else if (skipnextlf && c == '\n') {
369                /* Skip LF, and remember we saw CR LF. */
370                skipnextlf = 0;
371                newlinetypes |= NEWLINE_CRLF;
372                ++n;
373            }
374            else {
375                /* Normal char to be stored in buffer.  Also
376                 * update the newlinetypes flag if either this
377                 * is an LF or the previous char was a CR.
378                 */
379                if (c == '\n')
380                    newlinetypes |= NEWLINE_LF;
381                else if (skipnextlf)
382                    newlinetypes |= NEWLINE_CR;
383                *dst++ = c;
384                skipnextlf = 0;
385            }
386        }
387        if (shortread) {
388            /* If this is EOF, update type flags. */
389            if (skipnextlf && *bzerror == BZ_STREAM_END)
390                newlinetypes |= NEWLINE_CR;
391            break;
392        }
393    }
394    f->f_newlinetypes = newlinetypes;
395    f->f_skipnextlf = skipnextlf;
396    return dst - buf;
397}
398
399/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
400static void
401Util_DropReadAhead(BZ2FileObject *f)
402{
403    if (f->f_buf != NULL) {
404        PyMem_Free(f->f_buf);
405        f->f_buf = NULL;
406    }
407}
408
409/* This is a hacked version of Python's fileobject.c:readahead(). */
410static int
411Util_ReadAhead(BZ2FileObject *f, int bufsize)
412{
413    int chunksize;
414    int bzerror;
415
416    if (f->f_buf != NULL) {
417        if((f->f_bufend - f->f_bufptr) >= 1)
418            return 0;
419        else
420            Util_DropReadAhead(f);
421    }
422    if (f->mode == MODE_READ_EOF) {
423        f->f_bufptr = f->f_buf;
424        f->f_bufend = f->f_buf;
425        return 0;
426    }
427    if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
428        PyErr_NoMemory();
429        return -1;
430    }
431    Py_BEGIN_ALLOW_THREADS
432    chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
433                                     bufsize, f);
434    Py_END_ALLOW_THREADS
435    f->pos += chunksize;
436    if (bzerror == BZ_STREAM_END) {
437        f->size = f->pos;
438        f->mode = MODE_READ_EOF;
439    } else if (bzerror != BZ_OK) {
440        Util_CatchBZ2Error(bzerror);
441        Util_DropReadAhead(f);
442        return -1;
443    }
444    f->f_bufptr = f->f_buf;
445    f->f_bufend = f->f_buf + chunksize;
446    return 0;
447}
448
449/* This is a hacked version of Python's
450 * fileobject.c:readahead_get_line_skip(). */
451static PyStringObject *
452Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
453{
454    PyStringObject* s;
455    char *bufptr;
456    char *buf;
457    int len;
458
459    if (f->f_buf == NULL)
460        if (Util_ReadAhead(f, bufsize) < 0)
461            return NULL;
462
463    len = f->f_bufend - f->f_bufptr;
464    if (len == 0)
465        return (PyStringObject *)
466            PyString_FromStringAndSize(NULL, skip);
467    bufptr = memchr(f->f_bufptr, '\n', len);
468    if (bufptr != NULL) {
469        bufptr++;                               /* Count the '\n' */
470        len = bufptr - f->f_bufptr;
471        s = (PyStringObject *)
472            PyString_FromStringAndSize(NULL, skip+len);
473        if (s == NULL)
474            return NULL;
475        memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
476        f->f_bufptr = bufptr;
477        if (bufptr == f->f_bufend)
478            Util_DropReadAhead(f);
479    } else {
480        bufptr = f->f_bufptr;
481        buf = f->f_buf;
482        f->f_buf = NULL;                /* Force new readahead buffer */
483        s = Util_ReadAheadGetLineSkip(f, skip+len,
484                                      bufsize + (bufsize>>2));
485        if (s == NULL) {
486            PyMem_Free(buf);
487            return NULL;
488        }
489        memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
490        PyMem_Free(buf);
491    }
492    return s;
493}
494
495/* ===================================================================== */
496/* Methods of BZ2File. */
497
498PyDoc_STRVAR(BZ2File_read__doc__,
499"read([size]) -> string\n\
500\n\
501Read at most size uncompressed bytes, returned as a string. If the size\n\
502argument is negative or omitted, read until EOF is reached.\n\
503");
504
505/* This is a hacked version of Python's fileobject.c:file_read(). */
506static PyObject *
507BZ2File_read(BZ2FileObject *self, PyObject *args)
508{
509    long bytesrequested = -1;
510    size_t bytesread, buffersize, chunksize;
511    int bzerror;
512    PyObject *ret = NULL;
513
514    if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
515        return NULL;
516
517    ACQUIRE_LOCK(self);
518    switch (self->mode) {
519        case MODE_READ:
520            break;
521        case MODE_READ_EOF:
522            ret = PyString_FromString("");
523            goto cleanup;
524        case MODE_CLOSED:
525            PyErr_SetString(PyExc_ValueError,
526                            "I/O operation on closed file");
527            goto cleanup;
528        default:
529            PyErr_SetString(PyExc_IOError,
530                            "file is not ready for reading");
531            goto cleanup;
532    }
533
534    /* refuse to mix with f.next() */
535    if (check_iterbuffered(self))
536        goto cleanup;
537
538    if (bytesrequested < 0)
539        buffersize = Util_NewBufferSize((size_t)0);
540    else
541        buffersize = bytesrequested;
542    if (buffersize > INT_MAX) {
543        PyErr_SetString(PyExc_OverflowError,
544                        "requested number of bytes is "
545                        "more than a Python string can hold");
546        goto cleanup;
547    }
548    ret = PyString_FromStringAndSize((char *)NULL, buffersize);
549    if (ret == NULL)
550        goto cleanup;
551    bytesread = 0;
552
553    for (;;) {
554        Py_BEGIN_ALLOW_THREADS
555        chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
556                                         BUF(ret)+bytesread,
557                                         buffersize-bytesread,
558                                         self);
559        self->pos += chunksize;
560        Py_END_ALLOW_THREADS
561        bytesread += chunksize;
562        if (bzerror == BZ_STREAM_END) {
563            self->size = self->pos;
564            self->mode = MODE_READ_EOF;
565            break;
566        } else if (bzerror != BZ_OK) {
567            Util_CatchBZ2Error(bzerror);
568            Py_DECREF(ret);
569            ret = NULL;
570            goto cleanup;
571        }
572        if (bytesrequested < 0) {
573            buffersize = Util_NewBufferSize(buffersize);
574            if (_PyString_Resize(&ret, buffersize) < 0)
575                goto cleanup;
576        } else {
577            break;
578        }
579    }
580    if (bytesread != buffersize)
581        _PyString_Resize(&ret, bytesread);
582
583cleanup:
584    RELEASE_LOCK(self);
585    return ret;
586}
587
588PyDoc_STRVAR(BZ2File_readline__doc__,
589"readline([size]) -> string\n\
590\n\
591Return the next line from the file, as a string, retaining newline.\n\
592A non-negative size argument will limit the maximum number of bytes to\n\
593return (an incomplete line may be returned then). Return an empty\n\
594string at EOF.\n\
595");
596
597static PyObject *
598BZ2File_readline(BZ2FileObject *self, PyObject *args)
599{
600    PyObject *ret = NULL;
601    int sizehint = -1;
602
603    if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
604        return NULL;
605
606    ACQUIRE_LOCK(self);
607    switch (self->mode) {
608        case MODE_READ:
609            break;
610        case MODE_READ_EOF:
611            ret = PyString_FromString("");
612            goto cleanup;
613        case MODE_CLOSED:
614            PyErr_SetString(PyExc_ValueError,
615                            "I/O operation on closed file");
616            goto cleanup;
617        default:
618            PyErr_SetString(PyExc_IOError,
619                            "file is not ready for reading");
620            goto cleanup;
621    }
622
623    /* refuse to mix with f.next() */
624    if (check_iterbuffered(self))
625        goto cleanup;
626
627    if (sizehint == 0)
628        ret = PyString_FromString("");
629    else
630        ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
631
632cleanup:
633    RELEASE_LOCK(self);
634    return ret;
635}
636
637PyDoc_STRVAR(BZ2File_readlines__doc__,
638"readlines([size]) -> list\n\
639\n\
640Call readline() repeatedly and return a list of lines read.\n\
641The optional size argument, if given, is an approximate bound on the\n\
642total number of bytes in the lines returned.\n\
643");
644
645/* This is a hacked version of Python's fileobject.c:file_readlines(). */
646static PyObject *
647BZ2File_readlines(BZ2FileObject *self, PyObject *args)
648{
649    long sizehint = 0;
650    PyObject *list = NULL;
651    PyObject *line;
652    char small_buffer[SMALLCHUNK];
653    char *buffer = small_buffer;
654    size_t buffersize = SMALLCHUNK;
655    PyObject *big_buffer = NULL;
656    size_t nfilled = 0;
657    size_t nread;
658    size_t totalread = 0;
659    char *p, *q, *end;
660    int err;
661    int shortread = 0;
662    int bzerror;
663
664    if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
665        return NULL;
666
667    ACQUIRE_LOCK(self);
668    switch (self->mode) {
669        case MODE_READ:
670            break;
671        case MODE_READ_EOF:
672            list = PyList_New(0);
673            goto cleanup;
674        case MODE_CLOSED:
675            PyErr_SetString(PyExc_ValueError,
676                            "I/O operation on closed file");
677            goto cleanup;
678        default:
679            PyErr_SetString(PyExc_IOError,
680                            "file is not ready for reading");
681            goto cleanup;
682    }
683
684    /* refuse to mix with f.next() */
685    if (check_iterbuffered(self))
686        goto cleanup;
687
688    if ((list = PyList_New(0)) == NULL)
689        goto cleanup;
690
691    for (;;) {
692        Py_BEGIN_ALLOW_THREADS
693        nread = Util_UnivNewlineRead(&bzerror, self->fp,
694                                     buffer+nfilled,
695                                     buffersize-nfilled, self);
696        self->pos += nread;
697        Py_END_ALLOW_THREADS
698        if (bzerror == BZ_STREAM_END) {
699            self->size = self->pos;
700            self->mode = MODE_READ_EOF;
701            if (nread == 0) {
702                sizehint = 0;
703                break;
704            }
705            shortread = 1;
706        } else if (bzerror != BZ_OK) {
707            Util_CatchBZ2Error(bzerror);
708          error:
709            Py_DECREF(list);
710            list = NULL;
711            goto cleanup;
712        }
713        totalread += nread;
714        p = memchr(buffer+nfilled, '\n', nread);
715        if (!shortread && p == NULL) {
716            /* Need a larger buffer to fit this line */
717            nfilled += nread;
718            buffersize *= 2;
719            if (buffersize > INT_MAX) {
720                PyErr_SetString(PyExc_OverflowError,
721                "line is longer than a Python string can hold");
722                goto error;
723            }
724            if (big_buffer == NULL) {
725                /* Create the big buffer */
726                big_buffer = PyString_FromStringAndSize(
727                    NULL, buffersize);
728                if (big_buffer == NULL)
729                    goto error;
730                buffer = PyString_AS_STRING(big_buffer);
731                memcpy(buffer, small_buffer, nfilled);
732            }
733            else {
734                /* Grow the big buffer */
735                if (_PyString_Resize(&big_buffer, buffersize))
736                    goto error;
737                buffer = PyString_AS_STRING(big_buffer);
738            }
739            continue;
740        }
741        end = buffer+nfilled+nread;
742        q = buffer;
743        while (p != NULL) {
744            /* Process complete lines */
745            p++;
746            line = PyString_FromStringAndSize(q, p-q);
747            if (line == NULL)
748                goto error;
749            err = PyList_Append(list, line);
750            Py_DECREF(line);
751            if (err != 0)
752                goto error;
753            q = p;
754            p = memchr(q, '\n', end-q);
755        }
756        /* Move the remaining incomplete line to the start */
757        nfilled = end-q;
758        memmove(buffer, q, nfilled);
759        if (sizehint > 0)
760            if (totalread >= (size_t)sizehint)
761                break;
762        if (shortread) {
763            sizehint = 0;
764            break;
765        }
766    }
767    if (nfilled != 0) {
768        /* Partial last line */
769        line = PyString_FromStringAndSize(buffer, nfilled);
770        if (line == NULL)
771            goto error;
772        if (sizehint > 0) {
773            /* Need to complete the last line */
774            PyObject *rest = Util_GetLine(self, 0);
775            if (rest == NULL) {
776                Py_DECREF(line);
777                goto error;
778            }
779            PyString_Concat(&line, rest);
780            Py_DECREF(rest);
781            if (line == NULL)
782                goto error;
783        }
784        err = PyList_Append(list, line);
785        Py_DECREF(line);
786        if (err != 0)
787            goto error;
788    }
789
790  cleanup:
791    RELEASE_LOCK(self);
792    if (big_buffer) {
793        Py_DECREF(big_buffer);
794    }
795    return list;
796}
797
798PyDoc_STRVAR(BZ2File_xreadlines__doc__,
799"xreadlines() -> self\n\
800\n\
801For backward compatibility. BZ2File objects now include the performance\n\
802optimizations previously implemented in the xreadlines module.\n\
803");
804
805PyDoc_STRVAR(BZ2File_write__doc__,
806"write(data) -> None\n\
807\n\
808Write the 'data' string to file. Note that due to buffering, close() may\n\
809be needed before the file on disk reflects the data written.\n\
810");
811
812/* This is a hacked version of Python's fileobject.c:file_write(). */
813static PyObject *
814BZ2File_write(BZ2FileObject *self, PyObject *args)
815{
816    PyObject *ret = NULL;
817    Py_buffer pbuf;
818    char *buf;
819    int len;
820    int bzerror;
821
822    if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
823        return NULL;
824    buf = pbuf.buf;
825    len = pbuf.len;
826
827    ACQUIRE_LOCK(self);
828    switch (self->mode) {
829        case MODE_WRITE:
830            break;
831
832        case MODE_CLOSED:
833            PyErr_SetString(PyExc_ValueError,
834                            "I/O operation on closed file");
835            goto cleanup;
836
837        default:
838            PyErr_SetString(PyExc_IOError,
839                            "file is not ready for writing");
840            goto cleanup;
841    }
842
843    self->f_softspace = 0;
844
845    Py_BEGIN_ALLOW_THREADS
846    BZ2_bzWrite (&bzerror, self->fp, buf, len);
847    self->pos += len;
848    Py_END_ALLOW_THREADS
849
850    if (bzerror != BZ_OK) {
851        Util_CatchBZ2Error(bzerror);
852        goto cleanup;
853    }
854
855    Py_INCREF(Py_None);
856    ret = Py_None;
857
858cleanup:
859    PyBuffer_Release(&pbuf);
860    RELEASE_LOCK(self);
861    return ret;
862}
863
864PyDoc_STRVAR(BZ2File_writelines__doc__,
865"writelines(sequence_of_strings) -> None\n\
866\n\
867Write the sequence of strings to the file. Note that newlines are not\n\
868added. The sequence can be any iterable object producing strings. This is\n\
869equivalent to calling write() for each string.\n\
870");
871
872/* This is a hacked version of Python's fileobject.c:file_writelines(). */
873static PyObject *
874BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
875{
876#define CHUNKSIZE 1000
877    PyObject *list = NULL;
878    PyObject *iter = NULL;
879    PyObject *ret = NULL;
880    PyObject *line;
881    int i, j, index, len, islist;
882    int bzerror;
883
884    ACQUIRE_LOCK(self);
885    switch (self->mode) {
886        case MODE_WRITE:
887            break;
888
889        case MODE_CLOSED:
890            PyErr_SetString(PyExc_ValueError,
891                            "I/O operation on closed file");
892            goto error;
893
894        default:
895            PyErr_SetString(PyExc_IOError,
896                            "file is not ready for writing");
897            goto error;
898    }
899
900    islist = PyList_Check(seq);
901    if  (!islist) {
902        iter = PyObject_GetIter(seq);
903        if (iter == NULL) {
904            PyErr_SetString(PyExc_TypeError,
905                "writelines() requires an iterable argument");
906            goto error;
907        }
908        list = PyList_New(CHUNKSIZE);
909        if (list == NULL)
910            goto error;
911    }
912
913    /* Strategy: slurp CHUNKSIZE lines into a private list,
914       checking that they are all strings, then write that list
915       without holding the interpreter lock, then come back for more. */
916    for (index = 0; ; index += CHUNKSIZE) {
917        if (islist) {
918            Py_XDECREF(list);
919            list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
920            if (list == NULL)
921                goto error;
922            j = PyList_GET_SIZE(list);
923        }
924        else {
925            for (j = 0; j < CHUNKSIZE; j++) {
926                line = PyIter_Next(iter);
927                if (line == NULL) {
928                    if (PyErr_Occurred())
929                        goto error;
930                    break;
931                }
932                PyList_SetItem(list, j, line);
933            }
934        }
935        if (j == 0)
936            break;
937
938        /* Check that all entries are indeed strings. If not,
939           apply the same rules as for file.write() and
940           convert the rets to strings. This is slow, but
941           seems to be the only way since all conversion APIs
942           could potentially execute Python code. */
943        for (i = 0; i < j; i++) {
944            PyObject *v = PyList_GET_ITEM(list, i);
945            if (!PyString_Check(v)) {
946                const char *buffer;
947                Py_ssize_t len;
948                if (PyObject_AsCharBuffer(v, &buffer, &len)) {
949                    PyErr_SetString(PyExc_TypeError,
950                                    "writelines() "
951                                    "argument must be "
952                                    "a sequence of "
953                                    "strings");
954                    goto error;
955                }
956                line = PyString_FromStringAndSize(buffer,
957                                                  len);
958                if (line == NULL)
959                    goto error;
960                Py_DECREF(v);
961                PyList_SET_ITEM(list, i, line);
962            }
963        }
964
965        self->f_softspace = 0;
966
967        /* Since we are releasing the global lock, the
968           following code may *not* execute Python code. */
969        Py_BEGIN_ALLOW_THREADS
970        for (i = 0; i < j; i++) {
971            line = PyList_GET_ITEM(list, i);
972            len = PyString_GET_SIZE(line);
973            BZ2_bzWrite (&bzerror, self->fp,
974                         PyString_AS_STRING(line), len);
975            if (bzerror != BZ_OK) {
976                Py_BLOCK_THREADS
977                Util_CatchBZ2Error(bzerror);
978                goto error;
979            }
980        }
981        Py_END_ALLOW_THREADS
982
983        if (j < CHUNKSIZE)
984            break;
985    }
986
987    Py_INCREF(Py_None);
988    ret = Py_None;
989
990  error:
991    RELEASE_LOCK(self);
992    Py_XDECREF(list);
993    Py_XDECREF(iter);
994    return ret;
995#undef CHUNKSIZE
996}
997
998PyDoc_STRVAR(BZ2File_seek__doc__,
999"seek(offset [, whence]) -> None\n\
1000\n\
1001Move to new file position. Argument offset is a byte count. Optional\n\
1002argument whence defaults to 0 (offset from start of file, offset\n\
1003should be >= 0); other values are 1 (move relative to current position,\n\
1004positive or negative), and 2 (move relative to end of file, usually\n\
1005negative, although many platforms allow seeking beyond the end of a file).\n\
1006\n\
1007Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1008the operation may be extremely slow.\n\
1009");
1010
1011static PyObject *
1012BZ2File_seek(BZ2FileObject *self, PyObject *args)
1013{
1014    int where = 0;
1015    PyObject *offobj;
1016    Py_off_t offset;
1017    char small_buffer[SMALLCHUNK];
1018    char *buffer = small_buffer;
1019    size_t buffersize = SMALLCHUNK;
1020    Py_off_t bytesread = 0;
1021    size_t readsize;
1022    int chunksize;
1023    int bzerror;
1024    PyObject *ret = NULL;
1025
1026    if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1027        return NULL;
1028#if !defined(HAVE_LARGEFILE_SUPPORT)
1029    offset = PyInt_AsLong(offobj);
1030#else
1031    offset = PyLong_Check(offobj) ?
1032        PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1033#endif
1034    if (PyErr_Occurred())
1035        return NULL;
1036
1037    ACQUIRE_LOCK(self);
1038    Util_DropReadAhead(self);
1039    switch (self->mode) {
1040        case MODE_READ:
1041        case MODE_READ_EOF:
1042            break;
1043
1044        case MODE_CLOSED:
1045            PyErr_SetString(PyExc_ValueError,
1046                            "I/O operation on closed file");
1047            goto cleanup;
1048
1049        default:
1050            PyErr_SetString(PyExc_IOError,
1051                            "seek works only while reading");
1052            goto cleanup;
1053    }
1054
1055    if (where == 2) {
1056        if (self->size == -1) {
1057            assert(self->mode != MODE_READ_EOF);
1058            for (;;) {
1059                Py_BEGIN_ALLOW_THREADS
1060                chunksize = Util_UnivNewlineRead(
1061                                &bzerror, self->fp,
1062                                buffer, buffersize,
1063                                self);
1064                self->pos += chunksize;
1065                Py_END_ALLOW_THREADS
1066
1067                bytesread += chunksize;
1068                if (bzerror == BZ_STREAM_END) {
1069                    break;
1070                } else if (bzerror != BZ_OK) {
1071                    Util_CatchBZ2Error(bzerror);
1072                    goto cleanup;
1073                }
1074            }
1075            self->mode = MODE_READ_EOF;
1076            self->size = self->pos;
1077            bytesread = 0;
1078        }
1079        offset = self->size + offset;
1080    } else if (where == 1) {
1081        offset = self->pos + offset;
1082    }
1083
1084    /* Before getting here, offset must be the absolute position the file
1085     * pointer should be set to. */
1086
1087    if (offset >= self->pos) {
1088        /* we can move forward */
1089        offset -= self->pos;
1090    } else {
1091        /* we cannot move back, so rewind the stream */
1092        BZ2_bzReadClose(&bzerror, self->fp);
1093        if (self->fp) {
1094            PyFile_DecUseCount((PyFileObject *)self->file);
1095            self->fp = NULL;
1096        }
1097        if (bzerror != BZ_OK) {
1098            Util_CatchBZ2Error(bzerror);
1099            goto cleanup;
1100        }
1101        ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1102        if (!ret)
1103            goto cleanup;
1104        Py_DECREF(ret);
1105        ret = NULL;
1106        self->pos = 0;
1107        self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1108                                  0, 0, NULL, 0);
1109        if (self->fp)
1110            PyFile_IncUseCount((PyFileObject *)self->file);
1111        if (bzerror != BZ_OK) {
1112            Util_CatchBZ2Error(bzerror);
1113            goto cleanup;
1114        }
1115        self->mode = MODE_READ;
1116    }
1117
1118    if (offset <= 0 || self->mode == MODE_READ_EOF)
1119        goto exit;
1120
1121    /* Before getting here, offset must be set to the number of bytes
1122     * to walk forward. */
1123    for (;;) {
1124        if (offset-bytesread > buffersize)
1125            readsize = buffersize;
1126        else
1127            /* offset might be wider that readsize, but the result
1128             * of the subtraction is bound by buffersize (see the
1129             * condition above). buffersize is 8192. */
1130            readsize = (size_t)(offset-bytesread);
1131        Py_BEGIN_ALLOW_THREADS
1132        chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1133                                         buffer, readsize, self);
1134        self->pos += chunksize;
1135        Py_END_ALLOW_THREADS
1136        bytesread += chunksize;
1137        if (bzerror == BZ_STREAM_END) {
1138            self->size = self->pos;
1139            self->mode = MODE_READ_EOF;
1140            break;
1141        } else if (bzerror != BZ_OK) {
1142            Util_CatchBZ2Error(bzerror);
1143            goto cleanup;
1144        }
1145        if (bytesread == offset)
1146            break;
1147    }
1148
1149exit:
1150    Py_INCREF(Py_None);
1151    ret = Py_None;
1152
1153cleanup:
1154    RELEASE_LOCK(self);
1155    return ret;
1156}
1157
1158PyDoc_STRVAR(BZ2File_tell__doc__,
1159"tell() -> int\n\
1160\n\
1161Return the current file position, an integer (may be a long integer).\n\
1162");
1163
1164static PyObject *
1165BZ2File_tell(BZ2FileObject *self, PyObject *args)
1166{
1167    PyObject *ret = NULL;
1168
1169    if (self->mode == MODE_CLOSED) {
1170        PyErr_SetString(PyExc_ValueError,
1171                        "I/O operation on closed file");
1172        goto cleanup;
1173    }
1174
1175#if !defined(HAVE_LARGEFILE_SUPPORT)
1176    ret = PyInt_FromLong(self->pos);
1177#else
1178    ret = PyLong_FromLongLong(self->pos);
1179#endif
1180
1181cleanup:
1182    return ret;
1183}
1184
1185PyDoc_STRVAR(BZ2File_close__doc__,
1186"close() -> None or (perhaps) an integer\n\
1187\n\
1188Close the file. Sets data attribute .closed to true. A closed file\n\
1189cannot be used for further I/O operations. close() may be called more\n\
1190than once without error.\n\
1191");
1192
1193static PyObject *
1194BZ2File_close(BZ2FileObject *self)
1195{
1196    PyObject *ret = NULL;
1197    int bzerror = BZ_OK;
1198
1199    ACQUIRE_LOCK(self);
1200    switch (self->mode) {
1201        case MODE_READ:
1202        case MODE_READ_EOF:
1203            BZ2_bzReadClose(&bzerror, self->fp);
1204            break;
1205        case MODE_WRITE:
1206            BZ2_bzWriteClose(&bzerror, self->fp,
1207                             0, NULL, NULL);
1208            break;
1209    }
1210    if (self->file) {
1211        if (self->fp)
1212            PyFile_DecUseCount((PyFileObject *)self->file);
1213        ret = PyObject_CallMethod(self->file, "close", NULL);
1214    } else {
1215        Py_INCREF(Py_None);
1216        ret = Py_None;
1217    }
1218    self->fp = NULL;
1219    self->mode = MODE_CLOSED;
1220    if (bzerror != BZ_OK) {
1221        Util_CatchBZ2Error(bzerror);
1222        Py_XDECREF(ret);
1223        ret = NULL;
1224    }
1225
1226    RELEASE_LOCK(self);
1227    return ret;
1228}
1229
1230PyDoc_STRVAR(BZ2File_enter_doc,
1231"__enter__() -> self.");
1232
1233static PyObject *
1234BZ2File_enter(BZ2FileObject *self)
1235{
1236    if (self->mode == MODE_CLOSED) {
1237        PyErr_SetString(PyExc_ValueError,
1238            "I/O operation on closed file");
1239        return NULL;
1240    }
1241    Py_INCREF(self);
1242    return (PyObject *) self;
1243}
1244
1245PyDoc_STRVAR(BZ2File_exit_doc,
1246"__exit__(*excinfo) -> None.  Closes the file.");
1247
1248static PyObject *
1249BZ2File_exit(BZ2FileObject *self, PyObject *args)
1250{
1251    PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1252    if (!ret)
1253        /* If error occurred, pass through */
1254        return NULL;
1255    Py_DECREF(ret);
1256    Py_RETURN_NONE;
1257}
1258
1259
1260static PyObject *BZ2File_getiter(BZ2FileObject *self);
1261
1262static PyMethodDef BZ2File_methods[] = {
1263    {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1264    {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1265    {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1266    {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1267    {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1268    {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1269    {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1270    {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1271    {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1272    {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1273    {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1274    {NULL,              NULL}           /* sentinel */
1275};
1276
1277
1278/* ===================================================================== */
1279/* Getters and setters of BZ2File. */
1280
1281/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1282static PyObject *
1283BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1284{
1285    switch (self->f_newlinetypes) {
1286    case NEWLINE_UNKNOWN:
1287        Py_INCREF(Py_None);
1288        return Py_None;
1289    case NEWLINE_CR:
1290        return PyString_FromString("\r");
1291    case NEWLINE_LF:
1292        return PyString_FromString("\n");
1293    case NEWLINE_CR|NEWLINE_LF:
1294        return Py_BuildValue("(ss)", "\r", "\n");
1295    case NEWLINE_CRLF:
1296        return PyString_FromString("\r\n");
1297    case NEWLINE_CR|NEWLINE_CRLF:
1298        return Py_BuildValue("(ss)", "\r", "\r\n");
1299    case NEWLINE_LF|NEWLINE_CRLF:
1300        return Py_BuildValue("(ss)", "\n", "\r\n");
1301    case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1302        return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1303    default:
1304        PyErr_Format(PyExc_SystemError,
1305                     "Unknown newlines value 0x%x\n",
1306                     self->f_newlinetypes);
1307        return NULL;
1308    }
1309}
1310
1311static PyObject *
1312BZ2File_get_closed(BZ2FileObject *self, void *closure)
1313{
1314    return PyInt_FromLong(self->mode == MODE_CLOSED);
1315}
1316
1317static PyObject *
1318BZ2File_get_mode(BZ2FileObject *self, void *closure)
1319{
1320    return PyObject_GetAttrString(self->file, "mode");
1321}
1322
1323static PyObject *
1324BZ2File_get_name(BZ2FileObject *self, void *closure)
1325{
1326    return PyObject_GetAttrString(self->file, "name");
1327}
1328
1329static PyGetSetDef BZ2File_getset[] = {
1330    {"closed", (getter)BZ2File_get_closed, NULL,
1331                    "True if the file is closed"},
1332    {"newlines", (getter)BZ2File_get_newlines, NULL,
1333                    "end-of-line convention used in this file"},
1334    {"mode", (getter)BZ2File_get_mode, NULL,
1335                    "file mode ('r', 'w', or 'U')"},
1336    {"name", (getter)BZ2File_get_name, NULL,
1337                    "file name"},
1338    {NULL}      /* Sentinel */
1339};
1340
1341
1342/* ===================================================================== */
1343/* Members of BZ2File_Type. */
1344
1345#undef OFF
1346#define OFF(x) offsetof(BZ2FileObject, x)
1347
1348static PyMemberDef BZ2File_members[] = {
1349    {"softspace",       T_INT,          OFF(f_softspace), 0,
1350     "flag indicating that a space needs to be printed; used by print"},
1351    {NULL}      /* Sentinel */
1352};
1353
1354/* ===================================================================== */
1355/* Slot definitions for BZ2File_Type. */
1356
1357static int
1358BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1359{
1360    static char *kwlist[] = {"filename", "mode", "buffering",
1361                                   "compresslevel", 0};
1362    PyObject *name;
1363    char *mode = "r";
1364    int buffering = -1;
1365    int compresslevel = 9;
1366    int bzerror;
1367    int mode_char = 0;
1368
1369    self->size = -1;
1370
1371    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1372                                     kwlist, &name, &mode, &buffering,
1373                                     &compresslevel))
1374        return -1;
1375
1376    if (compresslevel < 1 || compresslevel > 9) {
1377        PyErr_SetString(PyExc_ValueError,
1378                        "compresslevel must be between 1 and 9");
1379        return -1;
1380    }
1381
1382    for (;;) {
1383        int error = 0;
1384        switch (*mode) {
1385            case 'r':
1386            case 'w':
1387                if (mode_char)
1388                    error = 1;
1389                mode_char = *mode;
1390                break;
1391
1392            case 'b':
1393                break;
1394
1395            case 'U':
1396#ifdef __VMS
1397                self->f_univ_newline = 0;
1398#else
1399                self->f_univ_newline = 1;
1400#endif
1401                break;
1402
1403            default:
1404                error = 1;
1405                break;
1406        }
1407        if (error) {
1408            PyErr_Format(PyExc_ValueError,
1409                         "invalid mode char %c", *mode);
1410            return -1;
1411        }
1412        mode++;
1413        if (*mode == '\0')
1414            break;
1415    }
1416
1417    if (mode_char == 0) {
1418        mode_char = 'r';
1419    }
1420
1421    mode = (mode_char == 'r') ? "rb" : "wb";
1422
1423    self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1424                                       name, mode, buffering);
1425    if (self->file == NULL)
1426        return -1;
1427
1428    /* From now on, we have stuff to dealloc, so jump to error label
1429     * instead of returning */
1430
1431#ifdef WITH_THREAD
1432    self->lock = PyThread_allocate_lock();
1433    if (!self->lock) {
1434        PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1435        goto error;
1436    }
1437#endif
1438
1439    if (mode_char == 'r')
1440        self->fp = BZ2_bzReadOpen(&bzerror,
1441                                  PyFile_AsFile(self->file),
1442                                  0, 0, NULL, 0);
1443    else
1444        self->fp = BZ2_bzWriteOpen(&bzerror,
1445                                   PyFile_AsFile(self->file),
1446                                   compresslevel, 0, 0);
1447
1448    if (bzerror != BZ_OK) {
1449        Util_CatchBZ2Error(bzerror);
1450        goto error;
1451    }
1452    PyFile_IncUseCount((PyFileObject *)self->file);
1453
1454    self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1455
1456    return 0;
1457
1458error:
1459    Py_CLEAR(self->file);
1460#ifdef WITH_THREAD
1461    if (self->lock) {
1462        PyThread_free_lock(self->lock);
1463        self->lock = NULL;
1464    }
1465#endif
1466    return -1;
1467}
1468
1469static void
1470BZ2File_dealloc(BZ2FileObject *self)
1471{
1472    int bzerror;
1473#ifdef WITH_THREAD
1474    if (self->lock)
1475        PyThread_free_lock(self->lock);
1476#endif
1477    switch (self->mode) {
1478        case MODE_READ:
1479        case MODE_READ_EOF:
1480            BZ2_bzReadClose(&bzerror, self->fp);
1481            break;
1482        case MODE_WRITE:
1483            BZ2_bzWriteClose(&bzerror, self->fp,
1484                             0, NULL, NULL);
1485            break;
1486    }
1487    if (self->fp != NULL && self->file != NULL)
1488        PyFile_DecUseCount((PyFileObject *)self->file);
1489    self->fp = NULL;
1490    Util_DropReadAhead(self);
1491    Py_XDECREF(self->file);
1492    Py_TYPE(self)->tp_free((PyObject *)self);
1493}
1494
1495/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1496static PyObject *
1497BZ2File_getiter(BZ2FileObject *self)
1498{
1499    if (self->mode == MODE_CLOSED) {
1500        PyErr_SetString(PyExc_ValueError,
1501                        "I/O operation on closed file");
1502        return NULL;
1503    }
1504    Py_INCREF((PyObject*)self);
1505    return (PyObject *)self;
1506}
1507
1508/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1509#define READAHEAD_BUFSIZE 8192
1510static PyObject *
1511BZ2File_iternext(BZ2FileObject *self)
1512{
1513    PyStringObject* ret;
1514    ACQUIRE_LOCK(self);
1515    if (self->mode == MODE_CLOSED) {
1516        RELEASE_LOCK(self);
1517        PyErr_SetString(PyExc_ValueError,
1518                        "I/O operation on closed file");
1519        return NULL;
1520    }
1521    ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1522    RELEASE_LOCK(self);
1523    if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1524        Py_XDECREF(ret);
1525        return NULL;
1526    }
1527    return (PyObject *)ret;
1528}
1529
1530/* ===================================================================== */
1531/* BZ2File_Type definition. */
1532
1533PyDoc_VAR(BZ2File__doc__) =
1534PyDoc_STR(
1535"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1536\n\
1537Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1538writing. When opened for writing, the file will be created if it doesn't\n\
1539exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1540unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1541is given, must be a number between 1 and 9.\n\
1542")
1543PyDoc_STR(
1544"\n\
1545Add a 'U' to mode to open the file for input with universal newline\n\
1546support. Any line ending in the input file will be seen as a '\\n' in\n\
1547Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1548for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1549'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1550newlines are available only when reading.\n\
1551")
1552;
1553
1554static PyTypeObject BZ2File_Type = {
1555    PyVarObject_HEAD_INIT(NULL, 0)
1556    "bz2.BZ2File",              /*tp_name*/
1557    sizeof(BZ2FileObject),      /*tp_basicsize*/
1558    0,                          /*tp_itemsize*/
1559    (destructor)BZ2File_dealloc, /*tp_dealloc*/
1560    0,                          /*tp_print*/
1561    0,                          /*tp_getattr*/
1562    0,                          /*tp_setattr*/
1563    0,                          /*tp_compare*/
1564    0,                          /*tp_repr*/
1565    0,                          /*tp_as_number*/
1566    0,                          /*tp_as_sequence*/
1567    0,                          /*tp_as_mapping*/
1568    0,                          /*tp_hash*/
1569    0,                      /*tp_call*/
1570    0,                      /*tp_str*/
1571    PyObject_GenericGetAttr,/*tp_getattro*/
1572    PyObject_GenericSetAttr,/*tp_setattro*/
1573    0,                      /*tp_as_buffer*/
1574    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1575    BZ2File__doc__,         /*tp_doc*/
1576    0,                      /*tp_traverse*/
1577    0,                      /*tp_clear*/
1578    0,                      /*tp_richcompare*/
1579    0,                      /*tp_weaklistoffset*/
1580    (getiterfunc)BZ2File_getiter, /*tp_iter*/
1581    (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1582    BZ2File_methods,        /*tp_methods*/
1583    BZ2File_members,        /*tp_members*/
1584    BZ2File_getset,         /*tp_getset*/
1585    0,                      /*tp_base*/
1586    0,                      /*tp_dict*/
1587    0,                      /*tp_descr_get*/
1588    0,                      /*tp_descr_set*/
1589    0,                      /*tp_dictoffset*/
1590    (initproc)BZ2File_init, /*tp_init*/
1591    PyType_GenericAlloc,    /*tp_alloc*/
1592    PyType_GenericNew,      /*tp_new*/
1593    _PyObject_Del,          /*tp_free*/
1594    0,                      /*tp_is_gc*/
1595};
1596
1597
1598/* ===================================================================== */
1599/* Methods of BZ2Comp. */
1600
1601PyDoc_STRVAR(BZ2Comp_compress__doc__,
1602"compress(data) -> string\n\
1603\n\
1604Provide more data to the compressor object. It will return chunks of\n\
1605compressed data whenever possible. When you've finished providing data\n\
1606to compress, call the flush() method to finish the compression process,\n\
1607and return what is left in the internal buffers.\n\
1608");
1609
1610static PyObject *
1611BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1612{
1613    Py_buffer pdata;
1614    size_t input_left;
1615    size_t output_size = 0;
1616    PyObject *ret = NULL;
1617    bz_stream *bzs = &self->bzs;
1618    int bzerror;
1619
1620    if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1621        return NULL;
1622
1623    if (pdata.len == 0) {
1624        PyBuffer_Release(&pdata);
1625        return PyString_FromString("");
1626    }
1627
1628    ACQUIRE_LOCK(self);
1629    if (!self->running) {
1630        PyErr_SetString(PyExc_ValueError,
1631                        "this object was already flushed");
1632        goto error;
1633    }
1634
1635    ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1636    if (!ret)
1637        goto error;
1638
1639    bzs->next_in = pdata.buf;
1640    bzs->avail_in = MIN(pdata.len, UINT_MAX);
1641    input_left = pdata.len - bzs->avail_in;
1642
1643    bzs->next_out = BUF(ret);
1644    bzs->avail_out = PyString_GET_SIZE(ret);
1645
1646    for (;;) {
1647        char *saved_next_out;
1648
1649        Py_BEGIN_ALLOW_THREADS
1650        saved_next_out = bzs->next_out;
1651        bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1652        output_size += bzs->next_out - saved_next_out;
1653        Py_END_ALLOW_THREADS
1654
1655        if (bzerror != BZ_RUN_OK) {
1656            Util_CatchBZ2Error(bzerror);
1657            goto error;
1658        }
1659        if (bzs->avail_in == 0) {
1660            if (input_left == 0)
1661                break; /* no more input data */
1662            bzs->avail_in = MIN(input_left, UINT_MAX);
1663            input_left -= bzs->avail_in;
1664        }
1665        if (bzs->avail_out == 0) {
1666            size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1667            if (buffer_left == 0) {
1668                if (Util_GrowBuffer(&ret) < 0) {
1669                    BZ2_bzCompressEnd(bzs);
1670                    goto error;
1671                }
1672                bzs->next_out = BUF(ret) + output_size;
1673                buffer_left = PyString_GET_SIZE(ret) - output_size;
1674            }
1675            bzs->avail_out = MIN(buffer_left, UINT_MAX);
1676        }
1677    }
1678
1679    if (_PyString_Resize(&ret, output_size) < 0)
1680        goto error;
1681
1682    RELEASE_LOCK(self);
1683    PyBuffer_Release(&pdata);
1684    return ret;
1685
1686error:
1687    RELEASE_LOCK(self);
1688    PyBuffer_Release(&pdata);
1689    Py_XDECREF(ret);
1690    return NULL;
1691}
1692
1693PyDoc_STRVAR(BZ2Comp_flush__doc__,
1694"flush() -> string\n\
1695\n\
1696Finish the compression process and return what is left in internal buffers.\n\
1697You must not use the compressor object after calling this method.\n\
1698");
1699
1700static PyObject *
1701BZ2Comp_flush(BZ2CompObject *self)
1702{
1703    size_t output_size = 0;
1704    PyObject *ret = NULL;
1705    bz_stream *bzs = &self->bzs;
1706    int bzerror;
1707
1708    ACQUIRE_LOCK(self);
1709    if (!self->running) {
1710        PyErr_SetString(PyExc_ValueError, "object was already flushed");
1711        goto error;
1712    }
1713    self->running = 0;
1714
1715    ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1716    if (!ret)
1717        goto error;
1718
1719    bzs->next_out = BUF(ret);
1720    bzs->avail_out = PyString_GET_SIZE(ret);
1721
1722    for (;;) {
1723        char *saved_next_out;
1724
1725        Py_BEGIN_ALLOW_THREADS
1726        saved_next_out = bzs->next_out;
1727        bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1728        output_size += bzs->next_out - saved_next_out;
1729        Py_END_ALLOW_THREADS
1730
1731        if (bzerror == BZ_STREAM_END) {
1732            break;
1733        } else if (bzerror != BZ_FINISH_OK) {
1734            Util_CatchBZ2Error(bzerror);
1735            goto error;
1736        }
1737        if (bzs->avail_out == 0) {
1738            size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1739            if (buffer_left == 0) {
1740                if (Util_GrowBuffer(&ret) < 0)
1741                    goto error;
1742                bzs->next_out = BUF(ret) + output_size;
1743                buffer_left = PyString_GET_SIZE(ret) - output_size;
1744            }
1745            bzs->avail_out = MIN(buffer_left, UINT_MAX);
1746        }
1747    }
1748
1749    if (output_size != PyString_GET_SIZE(ret))
1750        if (_PyString_Resize(&ret, output_size) < 0)
1751            goto error;
1752
1753    RELEASE_LOCK(self);
1754    return ret;
1755
1756error:
1757    RELEASE_LOCK(self);
1758    Py_XDECREF(ret);
1759    return NULL;
1760}
1761
1762static PyMethodDef BZ2Comp_methods[] = {
1763    {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1764     BZ2Comp_compress__doc__},
1765    {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1766     BZ2Comp_flush__doc__},
1767    {NULL,              NULL}           /* sentinel */
1768};
1769
1770
1771/* ===================================================================== */
1772/* Slot definitions for BZ2Comp_Type. */
1773
1774static int
1775BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1776{
1777    int compresslevel = 9;
1778    int bzerror;
1779    static char *kwlist[] = {"compresslevel", 0};
1780
1781    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1782                                     kwlist, &compresslevel))
1783        return -1;
1784
1785    if (compresslevel < 1 || compresslevel > 9) {
1786        PyErr_SetString(PyExc_ValueError,
1787                        "compresslevel must be between 1 and 9");
1788        goto error;
1789    }
1790
1791#ifdef WITH_THREAD
1792    self->lock = PyThread_allocate_lock();
1793    if (!self->lock) {
1794        PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1795        goto error;
1796    }
1797#endif
1798
1799    memset(&self->bzs, 0, sizeof(bz_stream));
1800    bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1801    if (bzerror != BZ_OK) {
1802        Util_CatchBZ2Error(bzerror);
1803        goto error;
1804    }
1805
1806    self->running = 1;
1807
1808    return 0;
1809error:
1810#ifdef WITH_THREAD
1811    if (self->lock) {
1812        PyThread_free_lock(self->lock);
1813        self->lock = NULL;
1814    }
1815#endif
1816    return -1;
1817}
1818
1819static void
1820BZ2Comp_dealloc(BZ2CompObject *self)
1821{
1822#ifdef WITH_THREAD
1823    if (self->lock)
1824        PyThread_free_lock(self->lock);
1825#endif
1826    BZ2_bzCompressEnd(&self->bzs);
1827    Py_TYPE(self)->tp_free((PyObject *)self);
1828}
1829
1830
1831/* ===================================================================== */
1832/* BZ2Comp_Type definition. */
1833
1834PyDoc_STRVAR(BZ2Comp__doc__,
1835"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1836\n\
1837Create a new compressor object. This object may be used to compress\n\
1838data sequentially. If you want to compress data in one shot, use the\n\
1839compress() function instead. The compresslevel parameter, if given,\n\
1840must be a number between 1 and 9.\n\
1841");
1842
1843static PyTypeObject BZ2Comp_Type = {
1844    PyVarObject_HEAD_INIT(NULL, 0)
1845    "bz2.BZ2Compressor",        /*tp_name*/
1846    sizeof(BZ2CompObject),      /*tp_basicsize*/
1847    0,                          /*tp_itemsize*/
1848    (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1849    0,                          /*tp_print*/
1850    0,                          /*tp_getattr*/
1851    0,                          /*tp_setattr*/
1852    0,                          /*tp_compare*/
1853    0,                          /*tp_repr*/
1854    0,                          /*tp_as_number*/
1855    0,                          /*tp_as_sequence*/
1856    0,                          /*tp_as_mapping*/
1857    0,                          /*tp_hash*/
1858    0,                      /*tp_call*/
1859    0,                      /*tp_str*/
1860    PyObject_GenericGetAttr,/*tp_getattro*/
1861    PyObject_GenericSetAttr,/*tp_setattro*/
1862    0,                      /*tp_as_buffer*/
1863    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1864    BZ2Comp__doc__,         /*tp_doc*/
1865    0,                      /*tp_traverse*/
1866    0,                      /*tp_clear*/
1867    0,                      /*tp_richcompare*/
1868    0,                      /*tp_weaklistoffset*/
1869    0,                      /*tp_iter*/
1870    0,                      /*tp_iternext*/
1871    BZ2Comp_methods,        /*tp_methods*/
1872    0,                      /*tp_members*/
1873    0,                      /*tp_getset*/
1874    0,                      /*tp_base*/
1875    0,                      /*tp_dict*/
1876    0,                      /*tp_descr_get*/
1877    0,                      /*tp_descr_set*/
1878    0,                      /*tp_dictoffset*/
1879    (initproc)BZ2Comp_init, /*tp_init*/
1880    PyType_GenericAlloc,    /*tp_alloc*/
1881    PyType_GenericNew,      /*tp_new*/
1882    _PyObject_Del,          /*tp_free*/
1883    0,                      /*tp_is_gc*/
1884};
1885
1886
1887/* ===================================================================== */
1888/* Members of BZ2Decomp. */
1889
1890#undef OFF
1891#define OFF(x) offsetof(BZ2DecompObject, x)
1892
1893static PyMemberDef BZ2Decomp_members[] = {
1894    {"unused_data", T_OBJECT, OFF(unused_data), RO},
1895    {NULL}      /* Sentinel */
1896};
1897
1898
1899/* ===================================================================== */
1900/* Methods of BZ2Decomp. */
1901
1902PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1903"decompress(data) -> string\n\
1904\n\
1905Provide more data to the decompressor object. It will return chunks\n\
1906of decompressed data whenever possible. If you try to decompress data\n\
1907after the end of stream is found, EOFError will be raised. If any data\n\
1908was found after the end of stream, it'll be ignored and saved in\n\
1909unused_data attribute.\n\
1910");
1911
1912static PyObject *
1913BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1914{
1915    Py_buffer pdata;
1916    size_t input_left;
1917    size_t output_size = 0;
1918    PyObject *ret = NULL;
1919    bz_stream *bzs = &self->bzs;
1920    int bzerror;
1921
1922    if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1923        return NULL;
1924
1925    ACQUIRE_LOCK(self);
1926    if (!self->running) {
1927        PyErr_SetString(PyExc_EOFError, "end of stream was "
1928                                        "already found");
1929        goto error;
1930    }
1931
1932    ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1933    if (!ret)
1934        goto error;
1935
1936    bzs->next_in = pdata.buf;
1937    bzs->avail_in = MIN(pdata.len, UINT_MAX);
1938    input_left = pdata.len - bzs->avail_in;
1939
1940    bzs->next_out = BUF(ret);
1941    bzs->avail_out = PyString_GET_SIZE(ret);
1942
1943    for (;;) {
1944        char *saved_next_out;
1945
1946        Py_BEGIN_ALLOW_THREADS
1947        saved_next_out = bzs->next_out;
1948        bzerror = BZ2_bzDecompress(bzs);
1949        output_size += bzs->next_out - saved_next_out;
1950        Py_END_ALLOW_THREADS
1951
1952        if (bzerror == BZ_STREAM_END) {
1953            self->running = 0;
1954            input_left += bzs->avail_in;
1955            if (input_left != 0) {
1956                Py_SETREF(self->unused_data,
1957                          PyString_FromStringAndSize(bzs->next_in, input_left));
1958                if (self->unused_data == NULL)
1959                    goto error;
1960            }
1961            break;
1962        }
1963        if (bzerror != BZ_OK) {
1964            Util_CatchBZ2Error(bzerror);
1965            goto error;
1966        }
1967        if (bzs->avail_in == 0) {
1968            if (input_left == 0)
1969                break; /* no more input data */
1970            bzs->avail_in = MIN(input_left, UINT_MAX);
1971            input_left -= bzs->avail_in;
1972        }
1973        if (bzs->avail_out == 0) {
1974            size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1975            if (buffer_left == 0) {
1976                if (Util_GrowBuffer(&ret) < 0) {
1977                    BZ2_bzDecompressEnd(bzs);
1978                    goto error;
1979                }
1980                bzs->next_out = BUF(ret) + output_size;
1981                buffer_left = PyString_GET_SIZE(ret) - output_size;
1982            }
1983            bzs->avail_out = MIN(buffer_left, UINT_MAX);
1984        }
1985    }
1986
1987    if (output_size != PyString_GET_SIZE(ret))
1988        if (_PyString_Resize(&ret, output_size) < 0)
1989            goto error;
1990
1991    RELEASE_LOCK(self);
1992    PyBuffer_Release(&pdata);
1993    return ret;
1994
1995error:
1996    RELEASE_LOCK(self);
1997    PyBuffer_Release(&pdata);
1998    Py_XDECREF(ret);
1999    return NULL;
2000}
2001
2002static PyMethodDef BZ2Decomp_methods[] = {
2003    {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
2004    {NULL,              NULL}           /* sentinel */
2005};
2006
2007
2008/* ===================================================================== */
2009/* Slot definitions for BZ2Decomp_Type. */
2010
2011static int
2012BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2013{
2014    int bzerror;
2015
2016    if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2017        return -1;
2018
2019#ifdef WITH_THREAD
2020    self->lock = PyThread_allocate_lock();
2021    if (!self->lock) {
2022        PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2023        goto error;
2024    }
2025#endif
2026
2027    self->unused_data = PyString_FromString("");
2028    if (!self->unused_data)
2029        goto error;
2030
2031    memset(&self->bzs, 0, sizeof(bz_stream));
2032    bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2033    if (bzerror != BZ_OK) {
2034        Util_CatchBZ2Error(bzerror);
2035        goto error;
2036    }
2037
2038    self->running = 1;
2039
2040    return 0;
2041
2042error:
2043#ifdef WITH_THREAD
2044    if (self->lock) {
2045        PyThread_free_lock(self->lock);
2046        self->lock = NULL;
2047    }
2048#endif
2049    Py_CLEAR(self->unused_data);
2050    return -1;
2051}
2052
2053static void
2054BZ2Decomp_dealloc(BZ2DecompObject *self)
2055{
2056#ifdef WITH_THREAD
2057    if (self->lock)
2058        PyThread_free_lock(self->lock);
2059#endif
2060    Py_XDECREF(self->unused_data);
2061    BZ2_bzDecompressEnd(&self->bzs);
2062    Py_TYPE(self)->tp_free((PyObject *)self);
2063}
2064
2065
2066/* ===================================================================== */
2067/* BZ2Decomp_Type definition. */
2068
2069PyDoc_STRVAR(BZ2Decomp__doc__,
2070"BZ2Decompressor() -> decompressor object\n\
2071\n\
2072Create a new decompressor object. This object may be used to decompress\n\
2073data sequentially. If you want to decompress data in one shot, use the\n\
2074decompress() function instead.\n\
2075");
2076
2077static PyTypeObject BZ2Decomp_Type = {
2078    PyVarObject_HEAD_INIT(NULL, 0)
2079    "bz2.BZ2Decompressor",      /*tp_name*/
2080    sizeof(BZ2DecompObject), /*tp_basicsize*/
2081    0,                          /*tp_itemsize*/
2082    (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2083    0,                          /*tp_print*/
2084    0,                          /*tp_getattr*/
2085    0,                          /*tp_setattr*/
2086    0,                          /*tp_compare*/
2087    0,                          /*tp_repr*/
2088    0,                          /*tp_as_number*/
2089    0,                          /*tp_as_sequence*/
2090    0,                          /*tp_as_mapping*/
2091    0,                          /*tp_hash*/
2092    0,                      /*tp_call*/
2093    0,                      /*tp_str*/
2094    PyObject_GenericGetAttr,/*tp_getattro*/
2095    PyObject_GenericSetAttr,/*tp_setattro*/
2096    0,                      /*tp_as_buffer*/
2097    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2098    BZ2Decomp__doc__,       /*tp_doc*/
2099    0,                      /*tp_traverse*/
2100    0,                      /*tp_clear*/
2101    0,                      /*tp_richcompare*/
2102    0,                      /*tp_weaklistoffset*/
2103    0,                      /*tp_iter*/
2104    0,                      /*tp_iternext*/
2105    BZ2Decomp_methods,      /*tp_methods*/
2106    BZ2Decomp_members,      /*tp_members*/
2107    0,                      /*tp_getset*/
2108    0,                      /*tp_base*/
2109    0,                      /*tp_dict*/
2110    0,                      /*tp_descr_get*/
2111    0,                      /*tp_descr_set*/
2112    0,                      /*tp_dictoffset*/
2113    (initproc)BZ2Decomp_init, /*tp_init*/
2114    PyType_GenericAlloc,    /*tp_alloc*/
2115    PyType_GenericNew,      /*tp_new*/
2116    _PyObject_Del,          /*tp_free*/
2117    0,                      /*tp_is_gc*/
2118};
2119
2120
2121/* ===================================================================== */
2122/* Module functions. */
2123
2124PyDoc_STRVAR(bz2_compress__doc__,
2125"compress(data [, compresslevel=9]) -> string\n\
2126\n\
2127Compress data in one shot. If you want to compress data sequentially,\n\
2128use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2129given, must be a number between 1 and 9.\n\
2130");
2131
2132static PyObject *
2133bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2134{
2135    int compresslevel=9;
2136    int action;
2137    Py_buffer pdata;
2138    size_t input_left;
2139    size_t output_size = 0;
2140    PyObject *ret = NULL;
2141    bz_stream _bzs;
2142    bz_stream *bzs = &_bzs;
2143    int bzerror;
2144    static char *kwlist[] = {"data", "compresslevel", 0};
2145
2146    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2147                                     kwlist, &pdata,
2148                                     &compresslevel))
2149        return NULL;
2150
2151    if (compresslevel < 1 || compresslevel > 9) {
2152        PyErr_SetString(PyExc_ValueError,
2153                        "compresslevel must be between 1 and 9");
2154        PyBuffer_Release(&pdata);
2155        return NULL;
2156    }
2157
2158    ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2159    if (!ret) {
2160        PyBuffer_Release(&pdata);
2161        return NULL;
2162    }
2163
2164    memset(bzs, 0, sizeof(bz_stream));
2165
2166    bzs->next_in = pdata.buf;
2167    bzs->avail_in = MIN(pdata.len, UINT_MAX);
2168    input_left = pdata.len - bzs->avail_in;
2169
2170    bzs->next_out = BUF(ret);
2171    bzs->avail_out = PyString_GET_SIZE(ret);
2172
2173    bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2174    if (bzerror != BZ_OK) {
2175        Util_CatchBZ2Error(bzerror);
2176        PyBuffer_Release(&pdata);
2177        Py_DECREF(ret);
2178        return NULL;
2179    }
2180
2181    action = input_left > 0 ? BZ_RUN : BZ_FINISH;
2182
2183    for (;;) {
2184        char *saved_next_out;
2185
2186        Py_BEGIN_ALLOW_THREADS
2187        saved_next_out = bzs->next_out;
2188        bzerror = BZ2_bzCompress(bzs, action);
2189        output_size += bzs->next_out - saved_next_out;
2190        Py_END_ALLOW_THREADS
2191
2192        if (bzerror == BZ_STREAM_END) {
2193            break;
2194        } else if (bzerror != BZ_RUN_OK && bzerror != BZ_FINISH_OK) {
2195            BZ2_bzCompressEnd(bzs);
2196            Util_CatchBZ2Error(bzerror);
2197            PyBuffer_Release(&pdata);
2198            Py_DECREF(ret);
2199            return NULL;
2200        }
2201        if (action == BZ_RUN && bzs->avail_in == 0) {
2202            if (input_left == 0) {
2203                action = BZ_FINISH;
2204            } else {
2205                bzs->avail_in = MIN(input_left, UINT_MAX);
2206                input_left -= bzs->avail_in;
2207            }
2208        }
2209        if (bzs->avail_out == 0) {
2210            size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2211            if (buffer_left == 0) {
2212                if (Util_GrowBuffer(&ret) < 0) {
2213                    BZ2_bzCompressEnd(bzs);
2214                    PyBuffer_Release(&pdata);
2215                    return NULL;
2216                }
2217                bzs->next_out = BUF(ret) + output_size;
2218                buffer_left = PyString_GET_SIZE(ret) - output_size;
2219            }
2220            bzs->avail_out = MIN(buffer_left, UINT_MAX);
2221        }
2222    }
2223
2224    if (output_size != PyString_GET_SIZE(ret))
2225        _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */
2226
2227    BZ2_bzCompressEnd(bzs);
2228    PyBuffer_Release(&pdata);
2229    return ret;
2230}
2231
2232PyDoc_STRVAR(bz2_decompress__doc__,
2233"decompress(data) -> decompressed data\n\
2234\n\
2235Decompress data in one shot. If you want to decompress data sequentially,\n\
2236use an instance of BZ2Decompressor instead.\n\
2237");
2238
2239static PyObject *
2240bz2_decompress(PyObject *self, PyObject *args)
2241{
2242    Py_buffer pdata;
2243    size_t input_left;
2244    size_t output_size = 0;
2245    PyObject *ret;
2246    bz_stream _bzs;
2247    bz_stream *bzs = &_bzs;
2248    int bzerror;
2249
2250    if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2251        return NULL;
2252
2253    if (pdata.len == 0) {
2254        PyBuffer_Release(&pdata);
2255        return PyString_FromString("");
2256    }
2257
2258    ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2259    if (!ret) {
2260        PyBuffer_Release(&pdata);
2261        return NULL;
2262    }
2263
2264    memset(bzs, 0, sizeof(bz_stream));
2265
2266    bzs->next_in = pdata.buf;
2267    bzs->avail_in = MIN(pdata.len, UINT_MAX);
2268    input_left = pdata.len - bzs->avail_in;
2269
2270    bzs->next_out = BUF(ret);
2271    bzs->avail_out = PyString_GET_SIZE(ret);
2272
2273    bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2274    if (bzerror != BZ_OK) {
2275        Util_CatchBZ2Error(bzerror);
2276        Py_DECREF(ret);
2277        PyBuffer_Release(&pdata);
2278        return NULL;
2279    }
2280
2281    for (;;) {
2282        char *saved_next_out;
2283
2284        Py_BEGIN_ALLOW_THREADS
2285        saved_next_out = bzs->next_out;
2286        bzerror = BZ2_bzDecompress(bzs);
2287        output_size += bzs->next_out - saved_next_out;
2288        Py_END_ALLOW_THREADS
2289
2290        if (bzerror == BZ_STREAM_END) {
2291            break;
2292        } else if (bzerror != BZ_OK) {
2293            BZ2_bzDecompressEnd(bzs);
2294            Util_CatchBZ2Error(bzerror);
2295            PyBuffer_Release(&pdata);
2296            Py_DECREF(ret);
2297            return NULL;
2298        }
2299        if (bzs->avail_in == 0) {
2300            if (input_left == 0) {
2301                BZ2_bzDecompressEnd(bzs);
2302                PyErr_SetString(PyExc_ValueError,
2303                                "couldn't find end of stream");
2304                PyBuffer_Release(&pdata);
2305                Py_DECREF(ret);
2306                return NULL;
2307            }
2308            bzs->avail_in = MIN(input_left, UINT_MAX);
2309            input_left -= bzs->avail_in;
2310        }
2311        if (bzs->avail_out == 0) {
2312            size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2313            if (buffer_left == 0) {
2314                if (Util_GrowBuffer(&ret) < 0) {
2315                    BZ2_bzDecompressEnd(bzs);
2316                    PyBuffer_Release(&pdata);
2317                    return NULL;
2318                }
2319                bzs->next_out = BUF(ret) + output_size;
2320                buffer_left = PyString_GET_SIZE(ret) - output_size;
2321            }
2322            bzs->avail_out = MIN(buffer_left, UINT_MAX);
2323        }
2324    }
2325
2326    if (output_size != PyString_GET_SIZE(ret))
2327        _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */
2328
2329    BZ2_bzDecompressEnd(bzs);
2330    PyBuffer_Release(&pdata);
2331    return ret;
2332}
2333
2334static PyMethodDef bz2_methods[] = {
2335    {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2336        bz2_compress__doc__},
2337    {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2338        bz2_decompress__doc__},
2339    {NULL,              NULL}           /* sentinel */
2340};
2341
2342/* ===================================================================== */
2343/* Initialization function. */
2344
2345PyDoc_STRVAR(bz2__doc__,
2346"The python bz2 module provides a comprehensive interface for\n\
2347the bz2 compression library. It implements a complete file\n\
2348interface, one shot (de)compression functions, and types for\n\
2349sequential (de)compression.\n\
2350");
2351
2352PyMODINIT_FUNC
2353initbz2(void)
2354{
2355    PyObject *m;
2356
2357    if (PyType_Ready(&BZ2File_Type) < 0)
2358        return;
2359    if (PyType_Ready(&BZ2Comp_Type) < 0)
2360        return;
2361    if (PyType_Ready(&BZ2Decomp_Type) < 0)
2362        return;
2363
2364    m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2365    if (m == NULL)
2366        return;
2367
2368    PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2369
2370    Py_INCREF(&BZ2File_Type);
2371    PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2372
2373    Py_INCREF(&BZ2Comp_Type);
2374    PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2375
2376    Py_INCREF(&BZ2Decomp_Type);
2377    PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2378}
2379