bz2module.c revision 2c7d6859a42634921b2bb2447dfa890633db4d05
1/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002  Python Software Foundation; All Rights Reserved
7
8*/
9
10#include "Python.h"
11#include <stdio.h>
12#include <bzlib.h>
13#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22    Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
37#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED   0
40#define MODE_READ     1
41#define MODE_READ_EOF 2
42#define MODE_WRITE    3
43
44#define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
45
46
47#ifdef BZ_CONFIG_ERROR
48
49#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51    (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
54    (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55#else
56#define BZS_TOTAL_OUT(bzs) \
57    bzs->total_out_lo32
58#endif
59
60#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
80#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) do { \
82    if (!PyThread_acquire_lock(obj->lock, 0)) { \
83        Py_BEGIN_ALLOW_THREADS \
84        PyThread_acquire_lock(obj->lock, 1); \
85        Py_END_ALLOW_THREADS \
86    } } while(0)
87#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
93/* Bits in f_newlinetypes */
94#define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
95#define NEWLINE_CR 1            /* \r newline seen */
96#define NEWLINE_LF 2            /* \n newline seen */
97#define NEWLINE_CRLF 4          /* \r\n newline seen */
98
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
103    PyObject_HEAD
104    PyObject *file;
105
106    char* f_buf;                /* Allocated readahead buffer */
107    char* f_bufend;             /* Points after last occupied position */
108    char* f_bufptr;             /* Current buffer position */
109
110    int f_softspace;            /* Flag used by 'print' command */
111
112    int f_univ_newline;         /* Handle any newline convention */
113    int f_newlinetypes;         /* Types of newlines seen */
114    int f_skipnextlf;           /* Skip next \n */
115
116    BZFILE *fp;
117    int mode;
118    Py_off_t pos;
119    Py_off_t size;
120#ifdef WITH_THREAD
121    PyThread_type_lock lock;
122#endif
123} BZ2FileObject;
124
125typedef struct {
126    PyObject_HEAD
127    bz_stream bzs;
128    int running;
129#ifdef WITH_THREAD
130    PyThread_type_lock lock;
131#endif
132} BZ2CompObject;
133
134typedef struct {
135    PyObject_HEAD
136    bz_stream bzs;
137    int running;
138    PyObject *unused_data;
139#ifdef WITH_THREAD
140    PyThread_type_lock lock;
141#endif
142} BZ2DecompObject;
143
144/* ===================================================================== */
145/* Utility functions. */
146
147/* Refuse regular I/O if there's data in the iteration-buffer.
148 * Mixing them would cause data to arrive out of order, as the read*
149 * methods don't use the iteration buffer. */
150static int
151check_iterbuffered(BZ2FileObject *f)
152{
153    if (f->f_buf != NULL &&
154        (f->f_bufend - f->f_bufptr) > 0 &&
155        f->f_buf[0] != '\0') {
156        PyErr_SetString(PyExc_ValueError,
157            "Mixing iteration and read methods would lose data");
158        return -1;
159    }
160    return 0;
161}
162
163static int
164Util_CatchBZ2Error(int bzerror)
165{
166    int ret = 0;
167    switch(bzerror) {
168        case BZ_OK:
169        case BZ_STREAM_END:
170            break;
171
172#ifdef BZ_CONFIG_ERROR
173        case BZ_CONFIG_ERROR:
174            PyErr_SetString(PyExc_SystemError,
175                            "the bz2 library was not compiled "
176                            "correctly");
177            ret = 1;
178            break;
179#endif
180
181        case BZ_PARAM_ERROR:
182            PyErr_SetString(PyExc_ValueError,
183                            "the bz2 library has received wrong "
184                            "parameters");
185            ret = 1;
186            break;
187
188        case BZ_MEM_ERROR:
189            PyErr_NoMemory();
190            ret = 1;
191            break;
192
193        case BZ_DATA_ERROR:
194        case BZ_DATA_ERROR_MAGIC:
195            PyErr_SetString(PyExc_IOError, "invalid data stream");
196            ret = 1;
197            break;
198
199        case BZ_IO_ERROR:
200            PyErr_SetString(PyExc_IOError, "unknown IO error");
201            ret = 1;
202            break;
203
204        case BZ_UNEXPECTED_EOF:
205            PyErr_SetString(PyExc_EOFError,
206                            "compressed file ended before the "
207                            "logical end-of-stream was detected");
208            ret = 1;
209            break;
210
211        case BZ_SEQUENCE_ERROR:
212            PyErr_SetString(PyExc_RuntimeError,
213                            "wrong sequence of bz2 library "
214                            "commands used");
215            ret = 1;
216            break;
217    }
218    return ret;
219}
220
221#if BUFSIZ < 8192
222#define SMALLCHUNK 8192
223#else
224#define SMALLCHUNK BUFSIZ
225#endif
226
227#if SIZEOF_INT < 4
228#define BIGCHUNK  (512 * 32)
229#else
230#define BIGCHUNK  (512 * 1024)
231#endif
232
233/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
234static size_t
235Util_NewBufferSize(size_t currentsize)
236{
237    if (currentsize > SMALLCHUNK) {
238        /* Keep doubling until we reach BIGCHUNK;
239           then keep adding BIGCHUNK. */
240        if (currentsize <= BIGCHUNK)
241            return currentsize + currentsize;
242        else
243            return currentsize + BIGCHUNK;
244    }
245    return currentsize + SMALLCHUNK;
246}
247
248/* This is a hacked version of Python's fileobject.c:get_line(). */
249static PyObject *
250Util_GetLine(BZ2FileObject *f, int n)
251{
252    char c;
253    char *buf, *end;
254    size_t total_v_size;        /* total # of slots in buffer */
255    size_t used_v_size;         /* # used slots in buffer */
256    size_t increment;       /* amount to increment the buffer */
257    PyObject *v;
258    int bzerror;
259    int bytes_read;
260    int newlinetypes = f->f_newlinetypes;
261    int skipnextlf = f->f_skipnextlf;
262    int univ_newline = f->f_univ_newline;
263
264    total_v_size = n > 0 ? n : 100;
265    v = PyString_FromStringAndSize((char *)NULL, total_v_size);
266    if (v == NULL)
267        return NULL;
268
269    buf = BUF(v);
270    end = buf + total_v_size;
271
272    for (;;) {
273        Py_BEGIN_ALLOW_THREADS
274        while (buf != end) {
275            bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276            f->pos++;
277            if (bytes_read == 0) break;
278            if (univ_newline) {
279                if (skipnextlf) {
280                    skipnextlf = 0;
281                    if (c == '\n') {
282                        /* Seeing a \n here with skipnextlf true means we
283                         * saw a \r before.
284                         */
285                        newlinetypes |= NEWLINE_CRLF;
286                        if (bzerror != BZ_OK) break;
287                        bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
288                        f->pos++;
289                        if (bytes_read == 0) break;
290                    } else {
291                        newlinetypes |= NEWLINE_CR;
292                    }
293                }
294                if (c == '\r') {
295                    skipnextlf = 1;
296                    c = '\n';
297                } else if (c == '\n')
298                    newlinetypes |= NEWLINE_LF;
299            }
300            *buf++ = c;
301            if (bzerror != BZ_OK || c == '\n') break;
302        }
303        if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
304            newlinetypes |= NEWLINE_CR;
305        Py_END_ALLOW_THREADS
306        f->f_newlinetypes = newlinetypes;
307        f->f_skipnextlf = skipnextlf;
308        if (bzerror == BZ_STREAM_END) {
309            f->size = f->pos;
310            f->mode = MODE_READ_EOF;
311            break;
312        } else if (bzerror != BZ_OK) {
313            Util_CatchBZ2Error(bzerror);
314            Py_DECREF(v);
315            return NULL;
316        }
317        if (c == '\n')
318            break;
319        /* Must be because buf == end */
320        if (n > 0)
321            break;
322        used_v_size = total_v_size;
323        increment = total_v_size >> 2; /* mild exponential growth */
324        total_v_size += increment;
325        if (total_v_size > INT_MAX) {
326            PyErr_SetString(PyExc_OverflowError,
327                "line is longer than a Python string can hold");
328            Py_DECREF(v);
329            return NULL;
330        }
331        if (_PyString_Resize(&v, total_v_size) < 0)
332            return NULL;
333        buf = BUF(v) + used_v_size;
334        end = BUF(v) + total_v_size;
335    }
336
337    used_v_size = buf - BUF(v);
338    if (used_v_size != total_v_size)
339        _PyString_Resize(&v, used_v_size);
340    return v;
341}
342
343/* This is a hacked version of Python's
344 * fileobject.c:Py_UniversalNewlineFread(). */
345size_t
346Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
347                     char* buf, size_t n, BZ2FileObject *f)
348{
349    char *dst = buf;
350    int newlinetypes, skipnextlf;
351
352    assert(buf != NULL);
353    assert(stream != NULL);
354
355    if (!f->f_univ_newline)
356        return BZ2_bzRead(bzerror, stream, buf, n);
357
358    newlinetypes = f->f_newlinetypes;
359    skipnextlf = f->f_skipnextlf;
360
361    /* Invariant:  n is the number of bytes remaining to be filled
362     * in the buffer.
363     */
364    while (n) {
365        size_t nread;
366        int shortread;
367        char *src = dst;
368
369        nread = BZ2_bzRead(bzerror, stream, dst, n);
370        assert(nread <= n);
371        n -= nread; /* assuming 1 byte out for each in; will adjust */
372        shortread = n != 0;             /* true iff EOF or error */
373        while (nread--) {
374            char c = *src++;
375            if (c == '\r') {
376                /* Save as LF and set flag to skip next LF. */
377                *dst++ = '\n';
378                skipnextlf = 1;
379            }
380            else if (skipnextlf && c == '\n') {
381                /* Skip LF, and remember we saw CR LF. */
382                skipnextlf = 0;
383                newlinetypes |= NEWLINE_CRLF;
384                ++n;
385            }
386            else {
387                /* Normal char to be stored in buffer.  Also
388                 * update the newlinetypes flag if either this
389                 * is an LF or the previous char was a CR.
390                 */
391                if (c == '\n')
392                    newlinetypes |= NEWLINE_LF;
393                else if (skipnextlf)
394                    newlinetypes |= NEWLINE_CR;
395                *dst++ = c;
396                skipnextlf = 0;
397            }
398        }
399        if (shortread) {
400            /* If this is EOF, update type flags. */
401            if (skipnextlf && *bzerror == BZ_STREAM_END)
402                newlinetypes |= NEWLINE_CR;
403            break;
404        }
405    }
406    f->f_newlinetypes = newlinetypes;
407    f->f_skipnextlf = skipnextlf;
408    return dst - buf;
409}
410
411/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
412static void
413Util_DropReadAhead(BZ2FileObject *f)
414{
415    if (f->f_buf != NULL) {
416        PyMem_Free(f->f_buf);
417        f->f_buf = NULL;
418    }
419}
420
421/* This is a hacked version of Python's fileobject.c:readahead(). */
422static int
423Util_ReadAhead(BZ2FileObject *f, int bufsize)
424{
425    int chunksize;
426    int bzerror;
427
428    if (f->f_buf != NULL) {
429        if((f->f_bufend - f->f_bufptr) >= 1)
430            return 0;
431        else
432            Util_DropReadAhead(f);
433    }
434    if (f->mode == MODE_READ_EOF) {
435        f->f_bufptr = f->f_buf;
436        f->f_bufend = f->f_buf;
437        return 0;
438    }
439    if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
440        PyErr_NoMemory();
441        return -1;
442    }
443    Py_BEGIN_ALLOW_THREADS
444    chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
445                                     bufsize, f);
446    Py_END_ALLOW_THREADS
447    f->pos += chunksize;
448    if (bzerror == BZ_STREAM_END) {
449        f->size = f->pos;
450        f->mode = MODE_READ_EOF;
451    } else if (bzerror != BZ_OK) {
452        Util_CatchBZ2Error(bzerror);
453        Util_DropReadAhead(f);
454        return -1;
455    }
456    f->f_bufptr = f->f_buf;
457    f->f_bufend = f->f_buf + chunksize;
458    return 0;
459}
460
461/* This is a hacked version of Python's
462 * fileobject.c:readahead_get_line_skip(). */
463static PyStringObject *
464Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
465{
466    PyStringObject* s;
467    char *bufptr;
468    char *buf;
469    int len;
470
471    if (f->f_buf == NULL)
472        if (Util_ReadAhead(f, bufsize) < 0)
473            return NULL;
474
475    len = f->f_bufend - f->f_bufptr;
476    if (len == 0)
477        return (PyStringObject *)
478            PyString_FromStringAndSize(NULL, skip);
479    bufptr = memchr(f->f_bufptr, '\n', len);
480    if (bufptr != NULL) {
481        bufptr++;                               /* Count the '\n' */
482        len = bufptr - f->f_bufptr;
483        s = (PyStringObject *)
484            PyString_FromStringAndSize(NULL, skip+len);
485        if (s == NULL)
486            return NULL;
487        memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
488        f->f_bufptr = bufptr;
489        if (bufptr == f->f_bufend)
490            Util_DropReadAhead(f);
491    } else {
492        bufptr = f->f_bufptr;
493        buf = f->f_buf;
494        f->f_buf = NULL;                /* Force new readahead buffer */
495        s = Util_ReadAheadGetLineSkip(f, skip+len,
496                                      bufsize + (bufsize>>2));
497        if (s == NULL) {
498            PyMem_Free(buf);
499            return NULL;
500        }
501        memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
502        PyMem_Free(buf);
503    }
504    return s;
505}
506
507/* ===================================================================== */
508/* Methods of BZ2File. */
509
510PyDoc_STRVAR(BZ2File_read__doc__,
511"read([size]) -> string\n\
512\n\
513Read at most size uncompressed bytes, returned as a string. If the size\n\
514argument is negative or omitted, read until EOF is reached.\n\
515");
516
517/* This is a hacked version of Python's fileobject.c:file_read(). */
518static PyObject *
519BZ2File_read(BZ2FileObject *self, PyObject *args)
520{
521    long bytesrequested = -1;
522    size_t bytesread, buffersize, chunksize;
523    int bzerror;
524    PyObject *ret = NULL;
525
526    if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
527        return NULL;
528
529    ACQUIRE_LOCK(self);
530    switch (self->mode) {
531        case MODE_READ:
532            break;
533        case MODE_READ_EOF:
534            ret = PyString_FromString("");
535            goto cleanup;
536        case MODE_CLOSED:
537            PyErr_SetString(PyExc_ValueError,
538                            "I/O operation on closed file");
539            goto cleanup;
540        default:
541            PyErr_SetString(PyExc_IOError,
542                            "file is not ready for reading");
543            goto cleanup;
544    }
545
546    /* refuse to mix with f.next() */
547    if (check_iterbuffered(self))
548        goto cleanup;
549
550    if (bytesrequested < 0)
551        buffersize = Util_NewBufferSize((size_t)0);
552    else
553        buffersize = bytesrequested;
554    if (buffersize > INT_MAX) {
555        PyErr_SetString(PyExc_OverflowError,
556                        "requested number of bytes is "
557                        "more than a Python string can hold");
558        goto cleanup;
559    }
560    ret = PyString_FromStringAndSize((char *)NULL, buffersize);
561    if (ret == NULL)
562        goto cleanup;
563    bytesread = 0;
564
565    for (;;) {
566        Py_BEGIN_ALLOW_THREADS
567        chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
568                                         BUF(ret)+bytesread,
569                                         buffersize-bytesread,
570                                         self);
571        self->pos += chunksize;
572        Py_END_ALLOW_THREADS
573        bytesread += chunksize;
574        if (bzerror == BZ_STREAM_END) {
575            self->size = self->pos;
576            self->mode = MODE_READ_EOF;
577            break;
578        } else if (bzerror != BZ_OK) {
579            Util_CatchBZ2Error(bzerror);
580            Py_DECREF(ret);
581            ret = NULL;
582            goto cleanup;
583        }
584        if (bytesrequested < 0) {
585            buffersize = Util_NewBufferSize(buffersize);
586            if (_PyString_Resize(&ret, buffersize) < 0)
587                goto cleanup;
588        } else {
589            break;
590        }
591    }
592    if (bytesread != buffersize)
593        _PyString_Resize(&ret, bytesread);
594
595cleanup:
596    RELEASE_LOCK(self);
597    return ret;
598}
599
600PyDoc_STRVAR(BZ2File_readline__doc__,
601"readline([size]) -> string\n\
602\n\
603Return the next line from the file, as a string, retaining newline.\n\
604A non-negative size argument will limit the maximum number of bytes to\n\
605return (an incomplete line may be returned then). Return an empty\n\
606string at EOF.\n\
607");
608
609static PyObject *
610BZ2File_readline(BZ2FileObject *self, PyObject *args)
611{
612    PyObject *ret = NULL;
613    int sizehint = -1;
614
615    if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
616        return NULL;
617
618    ACQUIRE_LOCK(self);
619    switch (self->mode) {
620        case MODE_READ:
621            break;
622        case MODE_READ_EOF:
623            ret = PyString_FromString("");
624            goto cleanup;
625        case MODE_CLOSED:
626            PyErr_SetString(PyExc_ValueError,
627                            "I/O operation on closed file");
628            goto cleanup;
629        default:
630            PyErr_SetString(PyExc_IOError,
631                            "file is not ready for reading");
632            goto cleanup;
633    }
634
635    /* refuse to mix with f.next() */
636    if (check_iterbuffered(self))
637        goto cleanup;
638
639    if (sizehint == 0)
640        ret = PyString_FromString("");
641    else
642        ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
643
644cleanup:
645    RELEASE_LOCK(self);
646    return ret;
647}
648
649PyDoc_STRVAR(BZ2File_readlines__doc__,
650"readlines([size]) -> list\n\
651\n\
652Call readline() repeatedly and return a list of lines read.\n\
653The optional size argument, if given, is an approximate bound on the\n\
654total number of bytes in the lines returned.\n\
655");
656
657/* This is a hacked version of Python's fileobject.c:file_readlines(). */
658static PyObject *
659BZ2File_readlines(BZ2FileObject *self, PyObject *args)
660{
661    long sizehint = 0;
662    PyObject *list = NULL;
663    PyObject *line;
664    char small_buffer[SMALLCHUNK];
665    char *buffer = small_buffer;
666    size_t buffersize = SMALLCHUNK;
667    PyObject *big_buffer = NULL;
668    size_t nfilled = 0;
669    size_t nread;
670    size_t totalread = 0;
671    char *p, *q, *end;
672    int err;
673    int shortread = 0;
674    int bzerror;
675
676    if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
677        return NULL;
678
679    ACQUIRE_LOCK(self);
680    switch (self->mode) {
681        case MODE_READ:
682            break;
683        case MODE_READ_EOF:
684            list = PyList_New(0);
685            goto cleanup;
686        case MODE_CLOSED:
687            PyErr_SetString(PyExc_ValueError,
688                            "I/O operation on closed file");
689            goto cleanup;
690        default:
691            PyErr_SetString(PyExc_IOError,
692                            "file is not ready for reading");
693            goto cleanup;
694    }
695
696    /* refuse to mix with f.next() */
697    if (check_iterbuffered(self))
698        goto cleanup;
699
700    if ((list = PyList_New(0)) == NULL)
701        goto cleanup;
702
703    for (;;) {
704        Py_BEGIN_ALLOW_THREADS
705        nread = Util_UnivNewlineRead(&bzerror, self->fp,
706                                     buffer+nfilled,
707                                     buffersize-nfilled, self);
708        self->pos += nread;
709        Py_END_ALLOW_THREADS
710        if (bzerror == BZ_STREAM_END) {
711            self->size = self->pos;
712            self->mode = MODE_READ_EOF;
713            if (nread == 0) {
714                sizehint = 0;
715                break;
716            }
717            shortread = 1;
718        } else if (bzerror != BZ_OK) {
719            Util_CatchBZ2Error(bzerror);
720          error:
721            Py_DECREF(list);
722            list = NULL;
723            goto cleanup;
724        }
725        totalread += nread;
726        p = memchr(buffer+nfilled, '\n', nread);
727        if (!shortread && p == NULL) {
728            /* Need a larger buffer to fit this line */
729            nfilled += nread;
730            buffersize *= 2;
731            if (buffersize > INT_MAX) {
732                PyErr_SetString(PyExc_OverflowError,
733                "line is longer than a Python string can hold");
734                goto error;
735            }
736            if (big_buffer == NULL) {
737                /* Create the big buffer */
738                big_buffer = PyString_FromStringAndSize(
739                    NULL, buffersize);
740                if (big_buffer == NULL)
741                    goto error;
742                buffer = PyString_AS_STRING(big_buffer);
743                memcpy(buffer, small_buffer, nfilled);
744            }
745            else {
746                /* Grow the big buffer */
747                _PyString_Resize(&big_buffer, buffersize);
748                buffer = PyString_AS_STRING(big_buffer);
749            }
750            continue;
751        }
752        end = buffer+nfilled+nread;
753        q = buffer;
754        while (p != NULL) {
755            /* Process complete lines */
756            p++;
757            line = PyString_FromStringAndSize(q, p-q);
758            if (line == NULL)
759                goto error;
760            err = PyList_Append(list, line);
761            Py_DECREF(line);
762            if (err != 0)
763                goto error;
764            q = p;
765            p = memchr(q, '\n', end-q);
766        }
767        /* Move the remaining incomplete line to the start */
768        nfilled = end-q;
769        memmove(buffer, q, nfilled);
770        if (sizehint > 0)
771            if (totalread >= (size_t)sizehint)
772                break;
773        if (shortread) {
774            sizehint = 0;
775            break;
776        }
777    }
778    if (nfilled != 0) {
779        /* Partial last line */
780        line = PyString_FromStringAndSize(buffer, nfilled);
781        if (line == NULL)
782            goto error;
783        if (sizehint > 0) {
784            /* Need to complete the last line */
785            PyObject *rest = Util_GetLine(self, 0);
786            if (rest == NULL) {
787                Py_DECREF(line);
788                goto error;
789            }
790            PyString_Concat(&line, rest);
791            Py_DECREF(rest);
792            if (line == NULL)
793                goto error;
794        }
795        err = PyList_Append(list, line);
796        Py_DECREF(line);
797        if (err != 0)
798            goto error;
799    }
800
801  cleanup:
802    RELEASE_LOCK(self);
803    if (big_buffer) {
804        Py_DECREF(big_buffer);
805    }
806    return list;
807}
808
809PyDoc_STRVAR(BZ2File_xreadlines__doc__,
810"xreadlines() -> self\n\
811\n\
812For backward compatibility. BZ2File objects now include the performance\n\
813optimizations previously implemented in the xreadlines module.\n\
814");
815
816PyDoc_STRVAR(BZ2File_write__doc__,
817"write(data) -> None\n\
818\n\
819Write the 'data' string to file. Note that due to buffering, close() may\n\
820be needed before the file on disk reflects the data written.\n\
821");
822
823/* This is a hacked version of Python's fileobject.c:file_write(). */
824static PyObject *
825BZ2File_write(BZ2FileObject *self, PyObject *args)
826{
827    PyObject *ret = NULL;
828    Py_buffer pbuf;
829    char *buf;
830    int len;
831    int bzerror;
832
833    if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
834        return NULL;
835    buf = pbuf.buf;
836    len = pbuf.len;
837
838    ACQUIRE_LOCK(self);
839    switch (self->mode) {
840        case MODE_WRITE:
841            break;
842
843        case MODE_CLOSED:
844            PyErr_SetString(PyExc_ValueError,
845                            "I/O operation on closed file");
846            goto cleanup;
847
848        default:
849            PyErr_SetString(PyExc_IOError,
850                            "file is not ready for writing");
851            goto cleanup;
852    }
853
854    self->f_softspace = 0;
855
856    Py_BEGIN_ALLOW_THREADS
857    BZ2_bzWrite (&bzerror, self->fp, buf, len);
858    self->pos += len;
859    Py_END_ALLOW_THREADS
860
861    if (bzerror != BZ_OK) {
862        Util_CatchBZ2Error(bzerror);
863        goto cleanup;
864    }
865
866    Py_INCREF(Py_None);
867    ret = Py_None;
868
869cleanup:
870    PyBuffer_Release(&pbuf);
871    RELEASE_LOCK(self);
872    return ret;
873}
874
875PyDoc_STRVAR(BZ2File_writelines__doc__,
876"writelines(sequence_of_strings) -> None\n\
877\n\
878Write the sequence of strings to the file. Note that newlines are not\n\
879added. The sequence can be any iterable object producing strings. This is\n\
880equivalent to calling write() for each string.\n\
881");
882
883/* This is a hacked version of Python's fileobject.c:file_writelines(). */
884static PyObject *
885BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
886{
887#define CHUNKSIZE 1000
888    PyObject *list = NULL;
889    PyObject *iter = NULL;
890    PyObject *ret = NULL;
891    PyObject *line;
892    int i, j, index, len, islist;
893    int bzerror;
894
895    ACQUIRE_LOCK(self);
896    switch (self->mode) {
897        case MODE_WRITE:
898            break;
899
900        case MODE_CLOSED:
901            PyErr_SetString(PyExc_ValueError,
902                            "I/O operation on closed file");
903            goto error;
904
905        default:
906            PyErr_SetString(PyExc_IOError,
907                            "file is not ready for writing");
908            goto error;
909    }
910
911    islist = PyList_Check(seq);
912    if  (!islist) {
913        iter = PyObject_GetIter(seq);
914        if (iter == NULL) {
915            PyErr_SetString(PyExc_TypeError,
916                "writelines() requires an iterable argument");
917            goto error;
918        }
919        list = PyList_New(CHUNKSIZE);
920        if (list == NULL)
921            goto error;
922    }
923
924    /* Strategy: slurp CHUNKSIZE lines into a private list,
925       checking that they are all strings, then write that list
926       without holding the interpreter lock, then come back for more. */
927    for (index = 0; ; index += CHUNKSIZE) {
928        if (islist) {
929            Py_XDECREF(list);
930            list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
931            if (list == NULL)
932                goto error;
933            j = PyList_GET_SIZE(list);
934        }
935        else {
936            for (j = 0; j < CHUNKSIZE; j++) {
937                line = PyIter_Next(iter);
938                if (line == NULL) {
939                    if (PyErr_Occurred())
940                        goto error;
941                    break;
942                }
943                PyList_SetItem(list, j, line);
944            }
945        }
946        if (j == 0)
947            break;
948
949        /* Check that all entries are indeed strings. If not,
950           apply the same rules as for file.write() and
951           convert the rets to strings. This is slow, but
952           seems to be the only way since all conversion APIs
953           could potentially execute Python code. */
954        for (i = 0; i < j; i++) {
955            PyObject *v = PyList_GET_ITEM(list, i);
956            if (!PyString_Check(v)) {
957                const char *buffer;
958                Py_ssize_t len;
959                if (PyObject_AsCharBuffer(v, &buffer, &len)) {
960                    PyErr_SetString(PyExc_TypeError,
961                                    "writelines() "
962                                    "argument must be "
963                                    "a sequence of "
964                                    "strings");
965                    goto error;
966                }
967                line = PyString_FromStringAndSize(buffer,
968                                                  len);
969                if (line == NULL)
970                    goto error;
971                Py_DECREF(v);
972                PyList_SET_ITEM(list, i, line);
973            }
974        }
975
976        self->f_softspace = 0;
977
978        /* Since we are releasing the global lock, the
979           following code may *not* execute Python code. */
980        Py_BEGIN_ALLOW_THREADS
981        for (i = 0; i < j; i++) {
982            line = PyList_GET_ITEM(list, i);
983            len = PyString_GET_SIZE(line);
984            BZ2_bzWrite (&bzerror, self->fp,
985                         PyString_AS_STRING(line), len);
986            if (bzerror != BZ_OK) {
987                Py_BLOCK_THREADS
988                Util_CatchBZ2Error(bzerror);
989                goto error;
990            }
991        }
992        Py_END_ALLOW_THREADS
993
994        if (j < CHUNKSIZE)
995            break;
996    }
997
998    Py_INCREF(Py_None);
999    ret = Py_None;
1000
1001  error:
1002    RELEASE_LOCK(self);
1003    Py_XDECREF(list);
1004    Py_XDECREF(iter);
1005    return ret;
1006#undef CHUNKSIZE
1007}
1008
1009PyDoc_STRVAR(BZ2File_seek__doc__,
1010"seek(offset [, whence]) -> None\n\
1011\n\
1012Move to new file position. Argument offset is a byte count. Optional\n\
1013argument whence defaults to 0 (offset from start of file, offset\n\
1014should be >= 0); other values are 1 (move relative to current position,\n\
1015positive or negative), and 2 (move relative to end of file, usually\n\
1016negative, although many platforms allow seeking beyond the end of a file).\n\
1017\n\
1018Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1019the operation may be extremely slow.\n\
1020");
1021
1022static PyObject *
1023BZ2File_seek(BZ2FileObject *self, PyObject *args)
1024{
1025    int where = 0;
1026    PyObject *offobj;
1027    Py_off_t offset;
1028    char small_buffer[SMALLCHUNK];
1029    char *buffer = small_buffer;
1030    size_t buffersize = SMALLCHUNK;
1031    Py_off_t bytesread = 0;
1032    size_t readsize;
1033    int chunksize;
1034    int bzerror;
1035    PyObject *ret = NULL;
1036
1037    if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1038        return NULL;
1039#if !defined(HAVE_LARGEFILE_SUPPORT)
1040    offset = PyInt_AsLong(offobj);
1041#else
1042    offset = PyLong_Check(offobj) ?
1043        PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1044#endif
1045    if (PyErr_Occurred())
1046        return NULL;
1047
1048    ACQUIRE_LOCK(self);
1049    Util_DropReadAhead(self);
1050    switch (self->mode) {
1051        case MODE_READ:
1052        case MODE_READ_EOF:
1053            break;
1054
1055        case MODE_CLOSED:
1056            PyErr_SetString(PyExc_ValueError,
1057                            "I/O operation on closed file");
1058            goto cleanup;
1059
1060        default:
1061            PyErr_SetString(PyExc_IOError,
1062                            "seek works only while reading");
1063            goto cleanup;
1064    }
1065
1066    if (where == 2) {
1067        if (self->size == -1) {
1068            assert(self->mode != MODE_READ_EOF);
1069            for (;;) {
1070                Py_BEGIN_ALLOW_THREADS
1071                chunksize = Util_UnivNewlineRead(
1072                                &bzerror, self->fp,
1073                                buffer, buffersize,
1074                                self);
1075                self->pos += chunksize;
1076                Py_END_ALLOW_THREADS
1077
1078                bytesread += chunksize;
1079                if (bzerror == BZ_STREAM_END) {
1080                    break;
1081                } else if (bzerror != BZ_OK) {
1082                    Util_CatchBZ2Error(bzerror);
1083                    goto cleanup;
1084                }
1085            }
1086            self->mode = MODE_READ_EOF;
1087            self->size = self->pos;
1088            bytesread = 0;
1089        }
1090        offset = self->size + offset;
1091    } else if (where == 1) {
1092        offset = self->pos + offset;
1093    }
1094
1095    /* Before getting here, offset must be the absolute position the file
1096     * pointer should be set to. */
1097
1098    if (offset >= self->pos) {
1099        /* we can move forward */
1100        offset -= self->pos;
1101    } else {
1102        /* we cannot move back, so rewind the stream */
1103        BZ2_bzReadClose(&bzerror, self->fp);
1104        if (self->fp) {
1105            PyFile_DecUseCount((PyFileObject *)self->file);
1106            self->fp = NULL;
1107        }
1108        if (bzerror != BZ_OK) {
1109            Util_CatchBZ2Error(bzerror);
1110            goto cleanup;
1111        }
1112        ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1113        if (!ret)
1114            goto cleanup;
1115        Py_DECREF(ret);
1116        ret = NULL;
1117        self->pos = 0;
1118        self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1119                                  0, 0, NULL, 0);
1120        if (self->fp)
1121            PyFile_IncUseCount((PyFileObject *)self->file);
1122        if (bzerror != BZ_OK) {
1123            Util_CatchBZ2Error(bzerror);
1124            goto cleanup;
1125        }
1126        self->mode = MODE_READ;
1127    }
1128
1129    if (offset <= 0 || self->mode == MODE_READ_EOF)
1130        goto exit;
1131
1132    /* Before getting here, offset must be set to the number of bytes
1133     * to walk forward. */
1134    for (;;) {
1135        if (offset-bytesread > buffersize)
1136            readsize = buffersize;
1137        else
1138            /* offset might be wider that readsize, but the result
1139             * of the subtraction is bound by buffersize (see the
1140             * condition above). buffersize is 8192. */
1141            readsize = (size_t)(offset-bytesread);
1142        Py_BEGIN_ALLOW_THREADS
1143        chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1144                                         buffer, readsize, self);
1145        self->pos += chunksize;
1146        Py_END_ALLOW_THREADS
1147        bytesread += chunksize;
1148        if (bzerror == BZ_STREAM_END) {
1149            self->size = self->pos;
1150            self->mode = MODE_READ_EOF;
1151            break;
1152        } else if (bzerror != BZ_OK) {
1153            Util_CatchBZ2Error(bzerror);
1154            goto cleanup;
1155        }
1156        if (bytesread == offset)
1157            break;
1158    }
1159
1160exit:
1161    Py_INCREF(Py_None);
1162    ret = Py_None;
1163
1164cleanup:
1165    RELEASE_LOCK(self);
1166    return ret;
1167}
1168
1169PyDoc_STRVAR(BZ2File_tell__doc__,
1170"tell() -> int\n\
1171\n\
1172Return the current file position, an integer (may be a long integer).\n\
1173");
1174
1175static PyObject *
1176BZ2File_tell(BZ2FileObject *self, PyObject *args)
1177{
1178    PyObject *ret = NULL;
1179
1180    if (self->mode == MODE_CLOSED) {
1181        PyErr_SetString(PyExc_ValueError,
1182                        "I/O operation on closed file");
1183        goto cleanup;
1184    }
1185
1186#if !defined(HAVE_LARGEFILE_SUPPORT)
1187    ret = PyInt_FromLong(self->pos);
1188#else
1189    ret = PyLong_FromLongLong(self->pos);
1190#endif
1191
1192cleanup:
1193    return ret;
1194}
1195
1196PyDoc_STRVAR(BZ2File_close__doc__,
1197"close() -> None or (perhaps) an integer\n\
1198\n\
1199Close the file. Sets data attribute .closed to true. A closed file\n\
1200cannot be used for further I/O operations. close() may be called more\n\
1201than once without error.\n\
1202");
1203
1204static PyObject *
1205BZ2File_close(BZ2FileObject *self)
1206{
1207    PyObject *ret = NULL;
1208    int bzerror = BZ_OK;
1209
1210    ACQUIRE_LOCK(self);
1211    switch (self->mode) {
1212        case MODE_READ:
1213        case MODE_READ_EOF:
1214            BZ2_bzReadClose(&bzerror, self->fp);
1215            break;
1216        case MODE_WRITE:
1217            BZ2_bzWriteClose(&bzerror, self->fp,
1218                             0, NULL, NULL);
1219            break;
1220    }
1221    if (self->fp) {
1222        PyFile_DecUseCount((PyFileObject *)self->file);
1223        self->fp = NULL;
1224    }
1225    self->mode = MODE_CLOSED;
1226    ret = PyObject_CallMethod(self->file, "close", NULL);
1227    if (bzerror != BZ_OK) {
1228        Util_CatchBZ2Error(bzerror);
1229        Py_XDECREF(ret);
1230        ret = NULL;
1231    }
1232
1233    RELEASE_LOCK(self);
1234    return ret;
1235}
1236
1237PyDoc_STRVAR(BZ2File_enter_doc,
1238"__enter__() -> self.");
1239
1240static PyObject *
1241BZ2File_enter(BZ2FileObject *self)
1242{
1243    if (self->mode == MODE_CLOSED) {
1244        PyErr_SetString(PyExc_ValueError,
1245            "I/O operation on closed file");
1246        return NULL;
1247    }
1248    Py_INCREF(self);
1249    return (PyObject *) self;
1250}
1251
1252PyDoc_STRVAR(BZ2File_exit_doc,
1253"__exit__(*excinfo) -> None.  Closes the file.");
1254
1255static PyObject *
1256BZ2File_exit(BZ2FileObject *self, PyObject *args)
1257{
1258    PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1259    if (!ret)
1260        /* If error occurred, pass through */
1261        return NULL;
1262    Py_DECREF(ret);
1263    Py_RETURN_NONE;
1264}
1265
1266
1267static PyObject *BZ2File_getiter(BZ2FileObject *self);
1268
1269static PyMethodDef BZ2File_methods[] = {
1270    {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1271    {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1272    {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1273    {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1274    {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1275    {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1276    {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1277    {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1278    {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1279    {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1280    {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1281    {NULL,              NULL}           /* sentinel */
1282};
1283
1284
1285/* ===================================================================== */
1286/* Getters and setters of BZ2File. */
1287
1288/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1289static PyObject *
1290BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1291{
1292    switch (self->f_newlinetypes) {
1293    case NEWLINE_UNKNOWN:
1294        Py_INCREF(Py_None);
1295        return Py_None;
1296    case NEWLINE_CR:
1297        return PyString_FromString("\r");
1298    case NEWLINE_LF:
1299        return PyString_FromString("\n");
1300    case NEWLINE_CR|NEWLINE_LF:
1301        return Py_BuildValue("(ss)", "\r", "\n");
1302    case NEWLINE_CRLF:
1303        return PyString_FromString("\r\n");
1304    case NEWLINE_CR|NEWLINE_CRLF:
1305        return Py_BuildValue("(ss)", "\r", "\r\n");
1306    case NEWLINE_LF|NEWLINE_CRLF:
1307        return Py_BuildValue("(ss)", "\n", "\r\n");
1308    case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1309        return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1310    default:
1311        PyErr_Format(PyExc_SystemError,
1312                     "Unknown newlines value 0x%x\n",
1313                     self->f_newlinetypes);
1314        return NULL;
1315    }
1316}
1317
1318static PyObject *
1319BZ2File_get_closed(BZ2FileObject *self, void *closure)
1320{
1321    return PyInt_FromLong(self->mode == MODE_CLOSED);
1322}
1323
1324static PyObject *
1325BZ2File_get_mode(BZ2FileObject *self, void *closure)
1326{
1327    return PyObject_GetAttrString(self->file, "mode");
1328}
1329
1330static PyObject *
1331BZ2File_get_name(BZ2FileObject *self, void *closure)
1332{
1333    return PyObject_GetAttrString(self->file, "name");
1334}
1335
1336static PyGetSetDef BZ2File_getset[] = {
1337    {"closed", (getter)BZ2File_get_closed, NULL,
1338                    "True if the file is closed"},
1339    {"newlines", (getter)BZ2File_get_newlines, NULL,
1340                    "end-of-line convention used in this file"},
1341    {"mode", (getter)BZ2File_get_mode, NULL,
1342                    "file mode ('r', 'w', or 'U')"},
1343    {"name", (getter)BZ2File_get_name, NULL,
1344                    "file name"},
1345    {NULL}      /* Sentinel */
1346};
1347
1348
1349/* ===================================================================== */
1350/* Members of BZ2File_Type. */
1351
1352#undef OFF
1353#define OFF(x) offsetof(BZ2FileObject, x)
1354
1355static PyMemberDef BZ2File_members[] = {
1356    {"softspace",       T_INT,          OFF(f_softspace), 0,
1357     "flag indicating that a space needs to be printed; used by print"},
1358    {NULL}      /* Sentinel */
1359};
1360
1361/* ===================================================================== */
1362/* Slot definitions for BZ2File_Type. */
1363
1364static int
1365BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1366{
1367    static char *kwlist[] = {"filename", "mode", "buffering",
1368                                   "compresslevel", 0};
1369    PyObject *name;
1370    char *mode = "r";
1371    int buffering = -1;
1372    int compresslevel = 9;
1373    int bzerror;
1374    int mode_char = 0;
1375
1376    self->size = -1;
1377
1378    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1379                                     kwlist, &name, &mode, &buffering,
1380                                     &compresslevel))
1381        return -1;
1382
1383    if (compresslevel < 1 || compresslevel > 9) {
1384        PyErr_SetString(PyExc_ValueError,
1385                        "compresslevel must be between 1 and 9");
1386        return -1;
1387    }
1388
1389    for (;;) {
1390        int error = 0;
1391        switch (*mode) {
1392            case 'r':
1393            case 'w':
1394                if (mode_char)
1395                    error = 1;
1396                mode_char = *mode;
1397                break;
1398
1399            case 'b':
1400                break;
1401
1402            case 'U':
1403#ifdef __VMS
1404                self->f_univ_newline = 0;
1405#else
1406                self->f_univ_newline = 1;
1407#endif
1408                break;
1409
1410            default:
1411                error = 1;
1412                break;
1413        }
1414        if (error) {
1415            PyErr_Format(PyExc_ValueError,
1416                         "invalid mode char %c", *mode);
1417            return -1;
1418        }
1419        mode++;
1420        if (*mode == '\0')
1421            break;
1422    }
1423
1424    if (mode_char == 0) {
1425        mode_char = 'r';
1426    }
1427
1428    mode = (mode_char == 'r') ? "rb" : "wb";
1429
1430    self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1431                                       name, mode, buffering);
1432    if (self->file == NULL)
1433        return -1;
1434
1435    /* From now on, we have stuff to dealloc, so jump to error label
1436     * instead of returning */
1437
1438#ifdef WITH_THREAD
1439    self->lock = PyThread_allocate_lock();
1440    if (!self->lock) {
1441        PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1442        goto error;
1443    }
1444#endif
1445
1446    if (mode_char == 'r')
1447        self->fp = BZ2_bzReadOpen(&bzerror,
1448                                  PyFile_AsFile(self->file),
1449                                  0, 0, NULL, 0);
1450    else
1451        self->fp = BZ2_bzWriteOpen(&bzerror,
1452                                   PyFile_AsFile(self->file),
1453                                   compresslevel, 0, 0);
1454
1455    if (bzerror != BZ_OK) {
1456        Util_CatchBZ2Error(bzerror);
1457        goto error;
1458    }
1459    PyFile_IncUseCount((PyFileObject *)self->file);
1460
1461    self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1462
1463    return 0;
1464
1465error:
1466    Py_CLEAR(self->file);
1467#ifdef WITH_THREAD
1468    if (self->lock) {
1469        PyThread_free_lock(self->lock);
1470        self->lock = NULL;
1471    }
1472#endif
1473    return -1;
1474}
1475
1476static void
1477BZ2File_dealloc(BZ2FileObject *self)
1478{
1479    int bzerror;
1480#ifdef WITH_THREAD
1481    if (self->lock)
1482        PyThread_free_lock(self->lock);
1483#endif
1484    switch (self->mode) {
1485        case MODE_READ:
1486        case MODE_READ_EOF:
1487            BZ2_bzReadClose(&bzerror, self->fp);
1488            break;
1489        case MODE_WRITE:
1490            BZ2_bzWriteClose(&bzerror, self->fp,
1491                             0, NULL, NULL);
1492            break;
1493    }
1494    if (self->fp) {
1495        PyFile_DecUseCount((PyFileObject *)self->file);
1496        self->fp = NULL;
1497    }
1498    Util_DropReadAhead(self);
1499    Py_XDECREF(self->file);
1500    Py_TYPE(self)->tp_free((PyObject *)self);
1501}
1502
1503/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1504static PyObject *
1505BZ2File_getiter(BZ2FileObject *self)
1506{
1507    if (self->mode == MODE_CLOSED) {
1508        PyErr_SetString(PyExc_ValueError,
1509                        "I/O operation on closed file");
1510        return NULL;
1511    }
1512    Py_INCREF((PyObject*)self);
1513    return (PyObject *)self;
1514}
1515
1516/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1517#define READAHEAD_BUFSIZE 8192
1518static PyObject *
1519BZ2File_iternext(BZ2FileObject *self)
1520{
1521    PyStringObject* ret;
1522    ACQUIRE_LOCK(self);
1523    if (self->mode == MODE_CLOSED) {
1524        RELEASE_LOCK(self);
1525        PyErr_SetString(PyExc_ValueError,
1526                        "I/O operation on closed file");
1527        return NULL;
1528    }
1529    ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1530    RELEASE_LOCK(self);
1531    if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1532        Py_XDECREF(ret);
1533        return NULL;
1534    }
1535    return (PyObject *)ret;
1536}
1537
1538/* ===================================================================== */
1539/* BZ2File_Type definition. */
1540
1541PyDoc_VAR(BZ2File__doc__) =
1542PyDoc_STR(
1543"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1544\n\
1545Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1546writing. When opened for writing, the file will be created if it doesn't\n\
1547exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1548unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1549is given, must be a number between 1 and 9.\n\
1550")
1551PyDoc_STR(
1552"\n\
1553Add a 'U' to mode to open the file for input with universal newline\n\
1554support. Any line ending in the input file will be seen as a '\\n' in\n\
1555Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1556for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1557'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1558newlines are available only when reading.\n\
1559")
1560;
1561
1562static PyTypeObject BZ2File_Type = {
1563    PyVarObject_HEAD_INIT(NULL, 0)
1564    "bz2.BZ2File",              /*tp_name*/
1565    sizeof(BZ2FileObject),      /*tp_basicsize*/
1566    0,                          /*tp_itemsize*/
1567    (destructor)BZ2File_dealloc, /*tp_dealloc*/
1568    0,                          /*tp_print*/
1569    0,                          /*tp_getattr*/
1570    0,                          /*tp_setattr*/
1571    0,                          /*tp_compare*/
1572    0,                          /*tp_repr*/
1573    0,                          /*tp_as_number*/
1574    0,                          /*tp_as_sequence*/
1575    0,                          /*tp_as_mapping*/
1576    0,                          /*tp_hash*/
1577    0,                      /*tp_call*/
1578    0,                      /*tp_str*/
1579    PyObject_GenericGetAttr,/*tp_getattro*/
1580    PyObject_GenericSetAttr,/*tp_setattro*/
1581    0,                      /*tp_as_buffer*/
1582    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1583    BZ2File__doc__,         /*tp_doc*/
1584    0,                      /*tp_traverse*/
1585    0,                      /*tp_clear*/
1586    0,                      /*tp_richcompare*/
1587    0,                      /*tp_weaklistoffset*/
1588    (getiterfunc)BZ2File_getiter, /*tp_iter*/
1589    (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1590    BZ2File_methods,        /*tp_methods*/
1591    BZ2File_members,        /*tp_members*/
1592    BZ2File_getset,         /*tp_getset*/
1593    0,                      /*tp_base*/
1594    0,                      /*tp_dict*/
1595    0,                      /*tp_descr_get*/
1596    0,                      /*tp_descr_set*/
1597    0,                      /*tp_dictoffset*/
1598    (initproc)BZ2File_init, /*tp_init*/
1599    PyType_GenericAlloc,    /*tp_alloc*/
1600    PyType_GenericNew,      /*tp_new*/
1601    _PyObject_Del,          /*tp_free*/
1602    0,                      /*tp_is_gc*/
1603};
1604
1605
1606/* ===================================================================== */
1607/* Methods of BZ2Comp. */
1608
1609PyDoc_STRVAR(BZ2Comp_compress__doc__,
1610"compress(data) -> string\n\
1611\n\
1612Provide more data to the compressor object. It will return chunks of\n\
1613compressed data whenever possible. When you've finished providing data\n\
1614to compress, call the flush() method to finish the compression process,\n\
1615and return what is left in the internal buffers.\n\
1616");
1617
1618static PyObject *
1619BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1620{
1621    Py_buffer pdata;
1622    char *data;
1623    int datasize;
1624    int bufsize = SMALLCHUNK;
1625    PY_LONG_LONG totalout;
1626    PyObject *ret = NULL;
1627    bz_stream *bzs = &self->bzs;
1628    int bzerror;
1629
1630    if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1631        return NULL;
1632    data = pdata.buf;
1633    datasize = pdata.len;
1634
1635    if (datasize == 0) {
1636        PyBuffer_Release(&pdata);
1637        return PyString_FromString("");
1638    }
1639
1640    ACQUIRE_LOCK(self);
1641    if (!self->running) {
1642        PyErr_SetString(PyExc_ValueError,
1643                        "this object was already flushed");
1644        goto error;
1645    }
1646
1647    ret = PyString_FromStringAndSize(NULL, bufsize);
1648    if (!ret)
1649        goto error;
1650
1651    bzs->next_in = data;
1652    bzs->avail_in = datasize;
1653    bzs->next_out = BUF(ret);
1654    bzs->avail_out = bufsize;
1655
1656    totalout = BZS_TOTAL_OUT(bzs);
1657
1658    for (;;) {
1659        Py_BEGIN_ALLOW_THREADS
1660        bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1661        Py_END_ALLOW_THREADS
1662        if (bzerror != BZ_RUN_OK) {
1663            Util_CatchBZ2Error(bzerror);
1664            goto error;
1665        }
1666        if (bzs->avail_in == 0)
1667            break; /* no more input data */
1668        if (bzs->avail_out == 0) {
1669            bufsize = Util_NewBufferSize(bufsize);
1670            if (_PyString_Resize(&ret, bufsize) < 0) {
1671                BZ2_bzCompressEnd(bzs);
1672                goto error;
1673            }
1674            bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1675                                        - totalout);
1676            bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1677        }
1678    }
1679
1680    _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1681
1682    RELEASE_LOCK(self);
1683    PyBuffer_Release(&pdata);
1684    return ret;
1685
1686error:
1687    RELEASE_LOCK(self);
1688    PyBuffer_Release(&pdata);
1689    Py_XDECREF(ret);
1690    return NULL;
1691}
1692
1693PyDoc_STRVAR(BZ2Comp_flush__doc__,
1694"flush() -> string\n\
1695\n\
1696Finish the compression process and return what is left in internal buffers.\n\
1697You must not use the compressor object after calling this method.\n\
1698");
1699
1700static PyObject *
1701BZ2Comp_flush(BZ2CompObject *self)
1702{
1703    int bufsize = SMALLCHUNK;
1704    PyObject *ret = NULL;
1705    bz_stream *bzs = &self->bzs;
1706    PY_LONG_LONG totalout;
1707    int bzerror;
1708
1709    ACQUIRE_LOCK(self);
1710    if (!self->running) {
1711        PyErr_SetString(PyExc_ValueError, "object was already "
1712                                          "flushed");
1713        goto error;
1714    }
1715    self->running = 0;
1716
1717    ret = PyString_FromStringAndSize(NULL, bufsize);
1718    if (!ret)
1719        goto error;
1720
1721    bzs->next_out = BUF(ret);
1722    bzs->avail_out = bufsize;
1723
1724    totalout = BZS_TOTAL_OUT(bzs);
1725
1726    for (;;) {
1727        Py_BEGIN_ALLOW_THREADS
1728        bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1729        Py_END_ALLOW_THREADS
1730        if (bzerror == BZ_STREAM_END) {
1731            break;
1732        } else if (bzerror != BZ_FINISH_OK) {
1733            Util_CatchBZ2Error(bzerror);
1734            goto error;
1735        }
1736        if (bzs->avail_out == 0) {
1737            bufsize = Util_NewBufferSize(bufsize);
1738            if (_PyString_Resize(&ret, bufsize) < 0)
1739                goto error;
1740            bzs->next_out = BUF(ret);
1741            bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1742                                        - totalout);
1743            bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1744        }
1745    }
1746
1747    if (bzs->avail_out != 0)
1748        _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1749
1750    RELEASE_LOCK(self);
1751    return ret;
1752
1753error:
1754    RELEASE_LOCK(self);
1755    Py_XDECREF(ret);
1756    return NULL;
1757}
1758
1759static PyMethodDef BZ2Comp_methods[] = {
1760    {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1761     BZ2Comp_compress__doc__},
1762    {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1763     BZ2Comp_flush__doc__},
1764    {NULL,              NULL}           /* sentinel */
1765};
1766
1767
1768/* ===================================================================== */
1769/* Slot definitions for BZ2Comp_Type. */
1770
1771static int
1772BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1773{
1774    int compresslevel = 9;
1775    int bzerror;
1776    static char *kwlist[] = {"compresslevel", 0};
1777
1778    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1779                                     kwlist, &compresslevel))
1780        return -1;
1781
1782    if (compresslevel < 1 || compresslevel > 9) {
1783        PyErr_SetString(PyExc_ValueError,
1784                        "compresslevel must be between 1 and 9");
1785        goto error;
1786    }
1787
1788#ifdef WITH_THREAD
1789    self->lock = PyThread_allocate_lock();
1790    if (!self->lock) {
1791        PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1792        goto error;
1793    }
1794#endif
1795
1796    memset(&self->bzs, 0, sizeof(bz_stream));
1797    bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1798    if (bzerror != BZ_OK) {
1799        Util_CatchBZ2Error(bzerror);
1800        goto error;
1801    }
1802
1803    self->running = 1;
1804
1805    return 0;
1806error:
1807#ifdef WITH_THREAD
1808    if (self->lock) {
1809        PyThread_free_lock(self->lock);
1810        self->lock = NULL;
1811    }
1812#endif
1813    return -1;
1814}
1815
1816static void
1817BZ2Comp_dealloc(BZ2CompObject *self)
1818{
1819#ifdef WITH_THREAD
1820    if (self->lock)
1821        PyThread_free_lock(self->lock);
1822#endif
1823    BZ2_bzCompressEnd(&self->bzs);
1824    Py_TYPE(self)->tp_free((PyObject *)self);
1825}
1826
1827
1828/* ===================================================================== */
1829/* BZ2Comp_Type definition. */
1830
1831PyDoc_STRVAR(BZ2Comp__doc__,
1832"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1833\n\
1834Create a new compressor object. This object may be used to compress\n\
1835data sequentially. If you want to compress data in one shot, use the\n\
1836compress() function instead. The compresslevel parameter, if given,\n\
1837must be a number between 1 and 9.\n\
1838");
1839
1840static PyTypeObject BZ2Comp_Type = {
1841    PyVarObject_HEAD_INIT(NULL, 0)
1842    "bz2.BZ2Compressor",        /*tp_name*/
1843    sizeof(BZ2CompObject),      /*tp_basicsize*/
1844    0,                          /*tp_itemsize*/
1845    (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1846    0,                          /*tp_print*/
1847    0,                          /*tp_getattr*/
1848    0,                          /*tp_setattr*/
1849    0,                          /*tp_compare*/
1850    0,                          /*tp_repr*/
1851    0,                          /*tp_as_number*/
1852    0,                          /*tp_as_sequence*/
1853    0,                          /*tp_as_mapping*/
1854    0,                          /*tp_hash*/
1855    0,                      /*tp_call*/
1856    0,                      /*tp_str*/
1857    PyObject_GenericGetAttr,/*tp_getattro*/
1858    PyObject_GenericSetAttr,/*tp_setattro*/
1859    0,                      /*tp_as_buffer*/
1860    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1861    BZ2Comp__doc__,         /*tp_doc*/
1862    0,                      /*tp_traverse*/
1863    0,                      /*tp_clear*/
1864    0,                      /*tp_richcompare*/
1865    0,                      /*tp_weaklistoffset*/
1866    0,                      /*tp_iter*/
1867    0,                      /*tp_iternext*/
1868    BZ2Comp_methods,        /*tp_methods*/
1869    0,                      /*tp_members*/
1870    0,                      /*tp_getset*/
1871    0,                      /*tp_base*/
1872    0,                      /*tp_dict*/
1873    0,                      /*tp_descr_get*/
1874    0,                      /*tp_descr_set*/
1875    0,                      /*tp_dictoffset*/
1876    (initproc)BZ2Comp_init, /*tp_init*/
1877    PyType_GenericAlloc,    /*tp_alloc*/
1878    PyType_GenericNew,      /*tp_new*/
1879    _PyObject_Del,          /*tp_free*/
1880    0,                      /*tp_is_gc*/
1881};
1882
1883
1884/* ===================================================================== */
1885/* Members of BZ2Decomp. */
1886
1887#undef OFF
1888#define OFF(x) offsetof(BZ2DecompObject, x)
1889
1890static PyMemberDef BZ2Decomp_members[] = {
1891    {"unused_data", T_OBJECT, OFF(unused_data), RO},
1892    {NULL}      /* Sentinel */
1893};
1894
1895
1896/* ===================================================================== */
1897/* Methods of BZ2Decomp. */
1898
1899PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1900"decompress(data) -> string\n\
1901\n\
1902Provide more data to the decompressor object. It will return chunks\n\
1903of decompressed data whenever possible. If you try to decompress data\n\
1904after the end of stream is found, EOFError will be raised. If any data\n\
1905was found after the end of stream, it'll be ignored and saved in\n\
1906unused_data attribute.\n\
1907");
1908
1909static PyObject *
1910BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1911{
1912    Py_buffer pdata;
1913    char *data;
1914    int datasize;
1915    int bufsize = SMALLCHUNK;
1916    PY_LONG_LONG totalout;
1917    PyObject *ret = NULL;
1918    bz_stream *bzs = &self->bzs;
1919    int bzerror;
1920
1921    if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1922        return NULL;
1923    data = pdata.buf;
1924    datasize = pdata.len;
1925
1926    ACQUIRE_LOCK(self);
1927    if (!self->running) {
1928        PyErr_SetString(PyExc_EOFError, "end of stream was "
1929                                        "already found");
1930        goto error;
1931    }
1932
1933    ret = PyString_FromStringAndSize(NULL, bufsize);
1934    if (!ret)
1935        goto error;
1936
1937    bzs->next_in = data;
1938    bzs->avail_in = datasize;
1939    bzs->next_out = BUF(ret);
1940    bzs->avail_out = bufsize;
1941
1942    totalout = BZS_TOTAL_OUT(bzs);
1943
1944    for (;;) {
1945        Py_BEGIN_ALLOW_THREADS
1946        bzerror = BZ2_bzDecompress(bzs);
1947        Py_END_ALLOW_THREADS
1948        if (bzerror == BZ_STREAM_END) {
1949            if (bzs->avail_in != 0) {
1950                Py_DECREF(self->unused_data);
1951                self->unused_data =
1952                    PyString_FromStringAndSize(bzs->next_in,
1953                                               bzs->avail_in);
1954            }
1955            self->running = 0;
1956            break;
1957        }
1958        if (bzerror != BZ_OK) {
1959            Util_CatchBZ2Error(bzerror);
1960            goto error;
1961        }
1962        if (bzs->avail_in == 0)
1963            break; /* no more input data */
1964        if (bzs->avail_out == 0) {
1965            bufsize = Util_NewBufferSize(bufsize);
1966            if (_PyString_Resize(&ret, bufsize) < 0) {
1967                BZ2_bzDecompressEnd(bzs);
1968                goto error;
1969            }
1970            bzs->next_out = BUF(ret);
1971            bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1972                                        - totalout);
1973            bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1974        }
1975    }
1976
1977    if (bzs->avail_out != 0)
1978        _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1979
1980    RELEASE_LOCK(self);
1981    PyBuffer_Release(&pdata);
1982    return ret;
1983
1984error:
1985    RELEASE_LOCK(self);
1986    PyBuffer_Release(&pdata);
1987    Py_XDECREF(ret);
1988    return NULL;
1989}
1990
1991static PyMethodDef BZ2Decomp_methods[] = {
1992    {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1993    {NULL,              NULL}           /* sentinel */
1994};
1995
1996
1997/* ===================================================================== */
1998/* Slot definitions for BZ2Decomp_Type. */
1999
2000static int
2001BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2002{
2003    int bzerror;
2004
2005    if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2006        return -1;
2007
2008#ifdef WITH_THREAD
2009    self->lock = PyThread_allocate_lock();
2010    if (!self->lock) {
2011        PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2012        goto error;
2013    }
2014#endif
2015
2016    self->unused_data = PyString_FromString("");
2017    if (!self->unused_data)
2018        goto error;
2019
2020    memset(&self->bzs, 0, sizeof(bz_stream));
2021    bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2022    if (bzerror != BZ_OK) {
2023        Util_CatchBZ2Error(bzerror);
2024        goto error;
2025    }
2026
2027    self->running = 1;
2028
2029    return 0;
2030
2031error:
2032#ifdef WITH_THREAD
2033    if (self->lock) {
2034        PyThread_free_lock(self->lock);
2035        self->lock = NULL;
2036    }
2037#endif
2038    Py_CLEAR(self->unused_data);
2039    return -1;
2040}
2041
2042static void
2043BZ2Decomp_dealloc(BZ2DecompObject *self)
2044{
2045#ifdef WITH_THREAD
2046    if (self->lock)
2047        PyThread_free_lock(self->lock);
2048#endif
2049    Py_XDECREF(self->unused_data);
2050    BZ2_bzDecompressEnd(&self->bzs);
2051    Py_TYPE(self)->tp_free((PyObject *)self);
2052}
2053
2054
2055/* ===================================================================== */
2056/* BZ2Decomp_Type definition. */
2057
2058PyDoc_STRVAR(BZ2Decomp__doc__,
2059"BZ2Decompressor() -> decompressor object\n\
2060\n\
2061Create a new decompressor object. This object may be used to decompress\n\
2062data sequentially. If you want to decompress data in one shot, use the\n\
2063decompress() function instead.\n\
2064");
2065
2066static PyTypeObject BZ2Decomp_Type = {
2067    PyVarObject_HEAD_INIT(NULL, 0)
2068    "bz2.BZ2Decompressor",      /*tp_name*/
2069    sizeof(BZ2DecompObject), /*tp_basicsize*/
2070    0,                          /*tp_itemsize*/
2071    (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2072    0,                          /*tp_print*/
2073    0,                          /*tp_getattr*/
2074    0,                          /*tp_setattr*/
2075    0,                          /*tp_compare*/
2076    0,                          /*tp_repr*/
2077    0,                          /*tp_as_number*/
2078    0,                          /*tp_as_sequence*/
2079    0,                          /*tp_as_mapping*/
2080    0,                          /*tp_hash*/
2081    0,                      /*tp_call*/
2082    0,                      /*tp_str*/
2083    PyObject_GenericGetAttr,/*tp_getattro*/
2084    PyObject_GenericSetAttr,/*tp_setattro*/
2085    0,                      /*tp_as_buffer*/
2086    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2087    BZ2Decomp__doc__,       /*tp_doc*/
2088    0,                      /*tp_traverse*/
2089    0,                      /*tp_clear*/
2090    0,                      /*tp_richcompare*/
2091    0,                      /*tp_weaklistoffset*/
2092    0,                      /*tp_iter*/
2093    0,                      /*tp_iternext*/
2094    BZ2Decomp_methods,      /*tp_methods*/
2095    BZ2Decomp_members,      /*tp_members*/
2096    0,                      /*tp_getset*/
2097    0,                      /*tp_base*/
2098    0,                      /*tp_dict*/
2099    0,                      /*tp_descr_get*/
2100    0,                      /*tp_descr_set*/
2101    0,                      /*tp_dictoffset*/
2102    (initproc)BZ2Decomp_init, /*tp_init*/
2103    PyType_GenericAlloc,    /*tp_alloc*/
2104    PyType_GenericNew,      /*tp_new*/
2105    _PyObject_Del,          /*tp_free*/
2106    0,                      /*tp_is_gc*/
2107};
2108
2109
2110/* ===================================================================== */
2111/* Module functions. */
2112
2113PyDoc_STRVAR(bz2_compress__doc__,
2114"compress(data [, compresslevel=9]) -> string\n\
2115\n\
2116Compress data in one shot. If you want to compress data sequentially,\n\
2117use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2118given, must be a number between 1 and 9.\n\
2119");
2120
2121static PyObject *
2122bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2123{
2124    int compresslevel=9;
2125    Py_buffer pdata;
2126    char *data;
2127    int datasize;
2128    int bufsize;
2129    PyObject *ret = NULL;
2130    bz_stream _bzs;
2131    bz_stream *bzs = &_bzs;
2132    int bzerror;
2133    static char *kwlist[] = {"data", "compresslevel", 0};
2134
2135    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2136                                     kwlist, &pdata,
2137                                     &compresslevel))
2138        return NULL;
2139    data = pdata.buf;
2140    datasize = pdata.len;
2141
2142    if (compresslevel < 1 || compresslevel > 9) {
2143        PyErr_SetString(PyExc_ValueError,
2144                        "compresslevel must be between 1 and 9");
2145        PyBuffer_Release(&pdata);
2146        return NULL;
2147    }
2148
2149    /* Conforming to bz2 manual, this is large enough to fit compressed
2150     * data in one shot. We will check it later anyway. */
2151    bufsize = datasize + (datasize/100+1) + 600;
2152
2153    ret = PyString_FromStringAndSize(NULL, bufsize);
2154    if (!ret) {
2155        PyBuffer_Release(&pdata);
2156        return NULL;
2157    }
2158
2159    memset(bzs, 0, sizeof(bz_stream));
2160
2161    bzs->next_in = data;
2162    bzs->avail_in = datasize;
2163    bzs->next_out = BUF(ret);
2164    bzs->avail_out = bufsize;
2165
2166    bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2167    if (bzerror != BZ_OK) {
2168        Util_CatchBZ2Error(bzerror);
2169        PyBuffer_Release(&pdata);
2170        Py_DECREF(ret);
2171        return NULL;
2172    }
2173
2174    for (;;) {
2175        Py_BEGIN_ALLOW_THREADS
2176        bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2177        Py_END_ALLOW_THREADS
2178        if (bzerror == BZ_STREAM_END) {
2179            break;
2180        } else if (bzerror != BZ_FINISH_OK) {
2181            BZ2_bzCompressEnd(bzs);
2182            Util_CatchBZ2Error(bzerror);
2183            PyBuffer_Release(&pdata);
2184            Py_DECREF(ret);
2185            return NULL;
2186        }
2187        if (bzs->avail_out == 0) {
2188            bufsize = Util_NewBufferSize(bufsize);
2189            if (_PyString_Resize(&ret, bufsize) < 0) {
2190                BZ2_bzCompressEnd(bzs);
2191                PyBuffer_Release(&pdata);
2192                Py_DECREF(ret);
2193                return NULL;
2194            }
2195            bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2196            bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2197        }
2198    }
2199
2200    if (bzs->avail_out != 0)
2201        _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2202    BZ2_bzCompressEnd(bzs);
2203
2204    PyBuffer_Release(&pdata);
2205    return ret;
2206}
2207
2208PyDoc_STRVAR(bz2_decompress__doc__,
2209"decompress(data) -> decompressed data\n\
2210\n\
2211Decompress data in one shot. If you want to decompress data sequentially,\n\
2212use an instance of BZ2Decompressor instead.\n\
2213");
2214
2215static PyObject *
2216bz2_decompress(PyObject *self, PyObject *args)
2217{
2218    Py_buffer pdata;
2219    char *data;
2220    int datasize;
2221    int bufsize = SMALLCHUNK;
2222    PyObject *ret;
2223    bz_stream _bzs;
2224    bz_stream *bzs = &_bzs;
2225    int bzerror;
2226
2227    if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2228        return NULL;
2229    data = pdata.buf;
2230    datasize = pdata.len;
2231
2232    if (datasize == 0) {
2233        PyBuffer_Release(&pdata);
2234        return PyString_FromString("");
2235    }
2236
2237    ret = PyString_FromStringAndSize(NULL, bufsize);
2238    if (!ret) {
2239        PyBuffer_Release(&pdata);
2240        return NULL;
2241    }
2242
2243    memset(bzs, 0, sizeof(bz_stream));
2244
2245    bzs->next_in = data;
2246    bzs->avail_in = datasize;
2247    bzs->next_out = BUF(ret);
2248    bzs->avail_out = bufsize;
2249
2250    bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2251    if (bzerror != BZ_OK) {
2252        Util_CatchBZ2Error(bzerror);
2253        Py_DECREF(ret);
2254        PyBuffer_Release(&pdata);
2255        return NULL;
2256    }
2257
2258    for (;;) {
2259        Py_BEGIN_ALLOW_THREADS
2260        bzerror = BZ2_bzDecompress(bzs);
2261        Py_END_ALLOW_THREADS
2262        if (bzerror == BZ_STREAM_END) {
2263            break;
2264        } else if (bzerror != BZ_OK) {
2265            BZ2_bzDecompressEnd(bzs);
2266            Util_CatchBZ2Error(bzerror);
2267            PyBuffer_Release(&pdata);
2268            Py_DECREF(ret);
2269            return NULL;
2270        }
2271        if (bzs->avail_in == 0) {
2272            BZ2_bzDecompressEnd(bzs);
2273            PyErr_SetString(PyExc_ValueError,
2274                            "couldn't find end of stream");
2275            PyBuffer_Release(&pdata);
2276            Py_DECREF(ret);
2277            return NULL;
2278        }
2279        if (bzs->avail_out == 0) {
2280            bufsize = Util_NewBufferSize(bufsize);
2281            if (_PyString_Resize(&ret, bufsize) < 0) {
2282                BZ2_bzDecompressEnd(bzs);
2283                PyBuffer_Release(&pdata);
2284                Py_DECREF(ret);
2285                return NULL;
2286            }
2287            bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2288            bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2289        }
2290    }
2291
2292    if (bzs->avail_out != 0)
2293        _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2294    BZ2_bzDecompressEnd(bzs);
2295    PyBuffer_Release(&pdata);
2296
2297    return ret;
2298}
2299
2300static PyMethodDef bz2_methods[] = {
2301    {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2302        bz2_compress__doc__},
2303    {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2304        bz2_decompress__doc__},
2305    {NULL,              NULL}           /* sentinel */
2306};
2307
2308/* ===================================================================== */
2309/* Initialization function. */
2310
2311PyDoc_STRVAR(bz2__doc__,
2312"The python bz2 module provides a comprehensive interface for\n\
2313the bz2 compression library. It implements a complete file\n\
2314interface, one shot (de)compression functions, and types for\n\
2315sequential (de)compression.\n\
2316");
2317
2318PyMODINIT_FUNC
2319initbz2(void)
2320{
2321    PyObject *m;
2322
2323    if (PyType_Ready(&BZ2File_Type) < 0)
2324        return;
2325    if (PyType_Ready(&BZ2Comp_Type) < 0)
2326        return;
2327    if (PyType_Ready(&BZ2Decomp_Type) < 0)
2328        return;
2329
2330    m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2331    if (m == NULL)
2332        return;
2333
2334    PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2335
2336    Py_INCREF(&BZ2File_Type);
2337    PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2338
2339    Py_INCREF(&BZ2Comp_Type);
2340    PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2341
2342    Py_INCREF(&BZ2Decomp_Type);
2343    PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2344}
2345