bz2module.c revision 6ee6db81c2a1834f6cce864e0c84e1a166b15e95
1/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002  Python Software Foundation; All Rights Reserved
7
8*/
9
10#include <stdio.h>
11#include <bzlib.h>
12#include "Python.h"
13#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22    Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
26
27#define MODE_CLOSED   0
28#define MODE_READ     1
29#define MODE_READ_EOF 2
30#define MODE_WRITE    3
31
32#define BZ2FileObject_Check(v)	((v)->ob_type == &BZ2File_Type)
33
34#if SIZEOF_LONG >= 8
35#define BZS_TOTAL_OUT(bzs) \
36	(((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
37#elif SIZEOF_LONG_LONG >= 8
38#define BZS_TOTAL_OUT(bzs) \
39	(((LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
40#else
41#define BZS_TOTAL_OUT(bzs) \
42	bzs->total_out_lo32;
43#endif
44
45#ifdef WITH_THREAD
46#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
47#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
48#else
49#define ACQUIRE_LOCK(obj)
50#define RELEASE_LOCK(obj)
51#endif
52
53#ifdef WITH_UNIVERSAL_NEWLINES
54/* Bits in f_newlinetypes */
55#define NEWLINE_UNKNOWN	0	/* No newline seen, yet */
56#define NEWLINE_CR 1		/* \r newline seen */
57#define NEWLINE_LF 2		/* \n newline seen */
58#define NEWLINE_CRLF 4		/* \r\n newline seen */
59#endif
60
61/* ===================================================================== */
62/* Structure definitions. */
63
64typedef struct {
65	PyFileObject file;
66	BZFILE *fp;
67	int mode;
68	long pos;
69	long size;
70#ifdef WITH_THREAD
71	PyThread_type_lock lock;
72#endif
73} BZ2FileObject;
74
75typedef struct {
76	PyObject_HEAD
77	bz_stream bzs;
78	int running;
79#ifdef WITH_THREAD
80	PyThread_type_lock lock;
81#endif
82} BZ2CompObject;
83
84typedef struct {
85	PyObject_HEAD
86	bz_stream bzs;
87	int running;
88	PyObject *unused_data;
89#ifdef WITH_THREAD
90	PyThread_type_lock lock;
91#endif
92} BZ2DecompObject;
93
94/* ===================================================================== */
95/* Utility functions. */
96
97static int
98Util_CatchBZ2Error(int bzerror)
99{
100	int ret = 0;
101	switch(bzerror) {
102		case BZ_OK:
103		case BZ_STREAM_END:
104			break;
105
106		case BZ_CONFIG_ERROR:
107			PyErr_SetString(PyExc_SystemError,
108					"the bz2 library was not compiled "
109					"correctly");
110			ret = 1;
111			break;
112
113		case BZ_PARAM_ERROR:
114			PyErr_SetString(PyExc_ValueError,
115					"the bz2 library has received wrong "
116					"parameters");
117			ret = 1;
118			break;
119
120		case BZ_MEM_ERROR:
121			PyErr_NoMemory();
122			ret = 1;
123			break;
124
125		case BZ_DATA_ERROR:
126		case BZ_DATA_ERROR_MAGIC:
127			PyErr_SetString(PyExc_IOError, "invalid data stream");
128			ret = 1;
129			break;
130
131		case BZ_IO_ERROR:
132			PyErr_SetString(PyExc_IOError, "unknown IO error");
133			ret = 1;
134			break;
135
136		case BZ_UNEXPECTED_EOF:
137			PyErr_SetString(PyExc_EOFError,
138					"compressed file ended before the "
139					"logical end-of-stream was detected");
140			ret = 1;
141			break;
142
143		case BZ_SEQUENCE_ERROR:
144			PyErr_SetString(PyExc_RuntimeError,
145					"wrong sequence of bz2 library "
146					"commands used");
147			ret = 1;
148			break;
149	}
150	return ret;
151}
152
153#if BUFSIZ < 8192
154#define SMALLCHUNK 8192
155#else
156#define SMALLCHUNK BUFSIZ
157#endif
158
159#if SIZEOF_INT < 4
160#define BIGCHUNK  (512 * 32)
161#else
162#define BIGCHUNK  (512 * 1024)
163#endif
164
165/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
166static size_t
167Util_NewBufferSize(size_t currentsize)
168{
169	if (currentsize > SMALLCHUNK) {
170		/* Keep doubling until we reach BIGCHUNK;
171		   then keep adding BIGCHUNK. */
172		if (currentsize <= BIGCHUNK)
173			return currentsize + currentsize;
174		else
175			return currentsize + BIGCHUNK;
176	}
177	return currentsize + SMALLCHUNK;
178}
179
180/* This is a hacked version of Python's fileobject.c:get_line(). */
181static PyObject *
182Util_GetLine(BZ2FileObject *self, int n)
183{
184	char c;
185	char *buf, *end;
186	size_t total_v_size;	/* total # of slots in buffer */
187	size_t used_v_size;	/* # used slots in buffer */
188	size_t increment;       /* amount to increment the buffer */
189	PyObject *v;
190	int bzerror;
191#ifdef WITH_UNIVERSAL_NEWLINES
192	int newlinetypes = ((PyFileObject*)self)->f_newlinetypes;
193	int skipnextlf = ((PyFileObject*)self)->f_skipnextlf;
194	int univ_newline = ((PyFileObject*)self)->f_univ_newline;
195#endif
196
197	total_v_size = n > 0 ? n : 100;
198	v = PyString_FromStringAndSize((char *)NULL, total_v_size);
199	if (v == NULL)
200		return NULL;
201
202	buf = BUF(v);
203	end = buf + total_v_size;
204
205	for (;;) {
206		Py_BEGIN_ALLOW_THREADS
207#ifdef WITH_UNIVERSAL_NEWLINES
208		if (univ_newline) {
209			while (1) {
210				BZ2_bzRead(&bzerror, self->fp, &c, 1);
211				self->pos++;
212				if (bzerror != BZ_OK || buf == end)
213					break;
214				if (skipnextlf) {
215					skipnextlf = 0;
216					if (c == '\n') {
217						/* Seeing a \n here with
218						 * skipnextlf true means we
219						 * saw a \r before.
220						 */
221						newlinetypes |= NEWLINE_CRLF;
222						BZ2_bzRead(&bzerror, self->fp,
223							   &c, 1);
224						if (bzerror != BZ_OK)
225							break;
226					} else {
227						newlinetypes |= NEWLINE_CR;
228					}
229				}
230				if (c == '\r') {
231					skipnextlf = 1;
232					c = '\n';
233				} else if ( c == '\n')
234					newlinetypes |= NEWLINE_LF;
235				*buf++ = c;
236				if (c == '\n') break;
237			}
238			if (bzerror == BZ_STREAM_END && skipnextlf)
239				newlinetypes |= NEWLINE_CR;
240		} else /* If not universal newlines use the normal loop */
241#endif
242			do {
243				BZ2_bzRead(&bzerror, self->fp, &c, 1);
244				self->pos++;
245				*buf++ = c;
246			} while (bzerror == BZ_OK && c != '\n' && buf != end);
247		Py_END_ALLOW_THREADS
248#ifdef WITH_UNIVERSAL_NEWLINES
249		((PyFileObject*)self)->f_newlinetypes = newlinetypes;
250		((PyFileObject*)self)->f_skipnextlf = skipnextlf;
251#endif
252		if (bzerror == BZ_STREAM_END) {
253			self->size = self->pos;
254			self->mode = MODE_READ_EOF;
255			break;
256		} else if (bzerror != BZ_OK) {
257			Util_CatchBZ2Error(bzerror);
258			Py_DECREF(v);
259			return NULL;
260		}
261		if (c == '\n')
262			break;
263		/* Must be because buf == end */
264		if (n > 0)
265			break;
266		used_v_size = total_v_size;
267		increment = total_v_size >> 2; /* mild exponential growth */
268		total_v_size += increment;
269		if (total_v_size > INT_MAX) {
270			PyErr_SetString(PyExc_OverflowError,
271			    "line is longer than a Python string can hold");
272			Py_DECREF(v);
273			return NULL;
274		}
275		if (_PyString_Resize(&v, total_v_size) < 0)
276			return NULL;
277		buf = BUF(v) + used_v_size;
278		end = BUF(v) + total_v_size;
279	}
280
281	used_v_size = buf - BUF(v);
282	if (used_v_size != total_v_size)
283		_PyString_Resize(&v, used_v_size);
284	return v;
285}
286
287#ifndef WITH_UNIVERSAL_NEWLINES
288#define Util_UnivNewlineRead(a,b,c,d,e) BZ2_bzRead(a,b,c,d)
289#else
290/* This is a hacked version of Python's
291 * fileobject.c:Py_UniversalNewlineFread(). */
292size_t
293Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
294		     char* buf, size_t n, BZ2FileObject *fobj)
295{
296	char *dst = buf;
297	PyFileObject *f = (PyFileObject *)fobj;
298	int newlinetypes, skipnextlf;
299
300	assert(buf != NULL);
301	assert(stream != NULL);
302
303	if (!f->f_univ_newline)
304		return BZ2_bzRead(bzerror, stream, buf, n);
305
306	newlinetypes = f->f_newlinetypes;
307	skipnextlf = f->f_skipnextlf;
308
309	/* Invariant:  n is the number of bytes remaining to be filled
310	 * in the buffer.
311	 */
312	while (n) {
313		size_t nread;
314		int shortread;
315		char *src = dst;
316
317		nread = BZ2_bzRead(bzerror, stream, dst, n);
318		assert(nread <= n);
319		n -= nread; /* assuming 1 byte out for each in; will adjust */
320		shortread = n != 0;	/* true iff EOF or error */
321		while (nread--) {
322			char c = *src++;
323			if (c == '\r') {
324				/* Save as LF and set flag to skip next LF. */
325				*dst++ = '\n';
326				skipnextlf = 1;
327			}
328			else if (skipnextlf && c == '\n') {
329				/* Skip LF, and remember we saw CR LF. */
330				skipnextlf = 0;
331				newlinetypes |= NEWLINE_CRLF;
332				++n;
333			}
334			else {
335				/* Normal char to be stored in buffer.  Also
336				 * update the newlinetypes flag if either this
337				 * is an LF or the previous char was a CR.
338				 */
339				if (c == '\n')
340					newlinetypes |= NEWLINE_LF;
341				else if (skipnextlf)
342					newlinetypes |= NEWLINE_CR;
343				*dst++ = c;
344				skipnextlf = 0;
345			}
346		}
347		if (shortread) {
348			/* If this is EOF, update type flags. */
349			if (skipnextlf && *bzerror == BZ_STREAM_END)
350				newlinetypes |= NEWLINE_CR;
351			break;
352		}
353	}
354	f->f_newlinetypes = newlinetypes;
355	f->f_skipnextlf = skipnextlf;
356	return dst - buf;
357}
358#endif
359
360/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
361static void
362Util_DropReadAhead(BZ2FileObject *self)
363{
364	PyFileObject *f = (PyFileObject*)self;
365	if (f->f_buf != NULL) {
366		PyMem_Free(f->f_buf);
367		f->f_buf = NULL;
368	}
369}
370
371/* This is a hacked version of Python's fileobject.c:readahead(). */
372static int
373Util_ReadAhead(BZ2FileObject *self, int bufsize)
374{
375	int chunksize;
376	int bzerror;
377	PyFileObject *f = (PyFileObject*)self;
378
379	if (f->f_buf != NULL) {
380		if((f->f_bufend - f->f_bufptr) >= 1)
381			return 0;
382		else
383			Util_DropReadAhead(self);
384	}
385	if (self->mode == MODE_READ_EOF) {
386		return -1;
387	}
388	if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
389		return -1;
390	}
391	Py_BEGIN_ALLOW_THREADS
392	chunksize = Util_UnivNewlineRead(&bzerror, self->fp, f->f_buf,
393					 bufsize, self);
394	Py_END_ALLOW_THREADS
395	self->pos += chunksize;
396	if (bzerror == BZ_STREAM_END) {
397		self->size = self->pos;
398		self->mode = MODE_READ_EOF;
399	} else if (bzerror != BZ_OK) {
400		Util_CatchBZ2Error(bzerror);
401		Util_DropReadAhead(self);
402		return -1;
403	}
404	f->f_bufptr = f->f_buf;
405	f->f_bufend = f->f_buf + chunksize;
406	return 0;
407}
408
409/* This is a hacked version of Python's
410 * fileobject.c:readahead_get_line_skip(). */
411static PyStringObject *
412Util_ReadAheadGetLineSkip(BZ2FileObject *bf, int skip, int bufsize)
413{
414	PyFileObject *f = (PyFileObject*)bf;
415	PyStringObject* s;
416	char *bufptr;
417	char *buf;
418	int len;
419
420	if (f->f_buf == NULL)
421		if (Util_ReadAhead(bf, bufsize) < 0)
422			return NULL;
423
424	len = f->f_bufend - f->f_bufptr;
425	if (len == 0)
426		return (PyStringObject *)
427			PyString_FromStringAndSize(NULL, skip);
428	bufptr = memchr(f->f_bufptr, '\n', len);
429	if (bufptr != NULL) {
430		bufptr++;			/* Count the '\n' */
431		len = bufptr - f->f_bufptr;
432		s = (PyStringObject *)
433			PyString_FromStringAndSize(NULL, skip+len);
434		if (s == NULL)
435			return NULL;
436		memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
437		f->f_bufptr = bufptr;
438		if (bufptr == f->f_bufend)
439			Util_DropReadAhead(bf);
440	} else {
441		bufptr = f->f_bufptr;
442		buf = f->f_buf;
443		f->f_buf = NULL; 	/* Force new readahead buffer */
444                s = Util_ReadAheadGetLineSkip(
445			bf, skip+len, bufsize + (bufsize>>2) );
446		if (s == NULL) {
447		        PyMem_Free(buf);
448			return NULL;
449		}
450		memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
451		PyMem_Free(buf);
452	}
453	return s;
454}
455
456/* ===================================================================== */
457/* Methods of BZ2File. */
458
459PyDoc_STRVAR(BZ2File_read__doc__,
460"read([size]) -> string\n\
461\n\
462Read at most size uncompressed bytes, returned as a string. If the size\n\
463argument is negative or omitted, read until EOF is reached.\n\
464");
465
466/* This is a hacked version of Python's fileobject.c:file_read(). */
467static PyObject *
468BZ2File_read(BZ2FileObject *self, PyObject *args)
469{
470	long bytesrequested = -1;
471	size_t bytesread, buffersize, chunksize;
472	int bzerror;
473	PyObject *ret = NULL;
474
475	if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
476		return NULL;
477
478	ACQUIRE_LOCK(self);
479	switch (self->mode) {
480		case MODE_READ:
481			break;
482		case MODE_READ_EOF:
483			ret = PyString_FromString("");
484			goto cleanup;
485		case MODE_CLOSED:
486			PyErr_SetString(PyExc_ValueError,
487					"I/O operation on closed file");
488			goto cleanup;
489		default:
490			PyErr_SetString(PyExc_IOError,
491					"file is not ready for reading");
492			goto cleanup;
493	}
494
495	if (bytesrequested < 0)
496		buffersize = Util_NewBufferSize((size_t)0);
497	else
498		buffersize = bytesrequested;
499	if (buffersize > INT_MAX) {
500		PyErr_SetString(PyExc_OverflowError,
501				"requested number of bytes is "
502				"more than a Python string can hold");
503		goto cleanup;
504	}
505	ret = PyString_FromStringAndSize((char *)NULL, buffersize);
506	if (ret == NULL)
507		goto cleanup;
508	bytesread = 0;
509
510	for (;;) {
511		Py_BEGIN_ALLOW_THREADS
512		chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
513						 BUF(ret)+bytesread,
514						 buffersize-bytesread,
515						 self);
516		self->pos += chunksize;
517		Py_END_ALLOW_THREADS
518		bytesread += chunksize;
519		if (bzerror == BZ_STREAM_END) {
520			self->size = self->pos;
521			self->mode = MODE_READ_EOF;
522			break;
523		} else if (bzerror != BZ_OK) {
524			Util_CatchBZ2Error(bzerror);
525			Py_DECREF(ret);
526			ret = NULL;
527			goto cleanup;
528		}
529		if (bytesrequested < 0) {
530			buffersize = Util_NewBufferSize(buffersize);
531			if (_PyString_Resize(&ret, buffersize) < 0)
532				goto cleanup;
533		} else {
534			break;
535		}
536	}
537	if (bytesread != buffersize)
538		_PyString_Resize(&ret, bytesread);
539
540cleanup:
541	RELEASE_LOCK(self);
542	return ret;
543}
544
545PyDoc_STRVAR(BZ2File_readline__doc__,
546"readline([size]) -> string\n\
547\n\
548Return the next line from the file, as a string, retaining newline.\n\
549A non-negative size argument will limit the maximum number of bytes to\n\
550return (an incomplete line may be returned then). Return an empty\n\
551string at EOF.\n\
552");
553
554static PyObject *
555BZ2File_readline(BZ2FileObject *self, PyObject *args)
556{
557	PyObject *ret = NULL;
558	int sizehint = -1;
559
560	if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
561		return NULL;
562
563	ACQUIRE_LOCK(self);
564	switch (self->mode) {
565		case MODE_READ:
566			break;
567		case MODE_READ_EOF:
568			ret = PyString_FromString("");
569			goto cleanup;
570		case MODE_CLOSED:
571			PyErr_SetString(PyExc_ValueError,
572					"I/O operation on closed file");
573			goto cleanup;
574		default:
575			PyErr_SetString(PyExc_IOError,
576					"file is not ready for reading");
577			goto cleanup;
578	}
579
580	if (sizehint == 0)
581		ret = PyString_FromString("");
582	else
583		ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
584
585cleanup:
586	RELEASE_LOCK(self);
587	return ret;
588}
589
590PyDoc_STRVAR(BZ2File_readlines__doc__,
591"readlines([size]) -> list\n\
592\n\
593Call readline() repeatedly and return a list of lines read.\n\
594The optional size argument, if given, is an approximate bound on the\n\
595total number of bytes in the lines returned.\n\
596");
597
598/* This is a hacked version of Python's fileobject.c:file_readlines(). */
599static PyObject *
600BZ2File_readlines(BZ2FileObject *self, PyObject *args)
601{
602	long sizehint = 0;
603	PyObject *list = NULL;
604	PyObject *line;
605	char small_buffer[SMALLCHUNK];
606	char *buffer = small_buffer;
607	size_t buffersize = SMALLCHUNK;
608	PyObject *big_buffer = NULL;
609	size_t nfilled = 0;
610	size_t nread;
611	size_t totalread = 0;
612	char *p, *q, *end;
613	int err;
614	int shortread = 0;
615	int bzerror;
616
617	if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
618		return NULL;
619
620	ACQUIRE_LOCK(self);
621	switch (self->mode) {
622		case MODE_READ:
623			break;
624		case MODE_READ_EOF:
625			list = PyList_New(0);
626			goto cleanup;
627		case MODE_CLOSED:
628			PyErr_SetString(PyExc_ValueError,
629					"I/O operation on closed file");
630			goto cleanup;
631		default:
632			PyErr_SetString(PyExc_IOError,
633					"file is not ready for reading");
634			goto cleanup;
635	}
636
637	if ((list = PyList_New(0)) == NULL)
638		goto cleanup;
639
640	for (;;) {
641		Py_BEGIN_ALLOW_THREADS
642		nread = Util_UnivNewlineRead(&bzerror, self->fp,
643					     buffer+nfilled,
644					     buffersize-nfilled, self);
645		self->pos += nread;
646		Py_END_ALLOW_THREADS
647		if (bzerror == BZ_STREAM_END) {
648			self->size = self->pos;
649			self->mode = MODE_READ_EOF;
650			if (nread == 0) {
651				sizehint = 0;
652				break;
653			}
654			shortread = 1;
655		} else if (bzerror != BZ_OK) {
656			Util_CatchBZ2Error(bzerror);
657		  error:
658			Py_DECREF(list);
659			list = NULL;
660			goto cleanup;
661		}
662		totalread += nread;
663		p = memchr(buffer+nfilled, '\n', nread);
664		if (p == NULL) {
665			/* Need a larger buffer to fit this line */
666			nfilled += nread;
667			buffersize *= 2;
668			if (buffersize > INT_MAX) {
669				PyErr_SetString(PyExc_OverflowError,
670			    "line is longer than a Python string can hold");
671				goto error;
672			}
673			if (big_buffer == NULL) {
674				/* Create the big buffer */
675				big_buffer = PyString_FromStringAndSize(
676					NULL, buffersize);
677				if (big_buffer == NULL)
678					goto error;
679				buffer = PyString_AS_STRING(big_buffer);
680				memcpy(buffer, small_buffer, nfilled);
681			}
682			else {
683				/* Grow the big buffer */
684				_PyString_Resize(&big_buffer, buffersize);
685				buffer = PyString_AS_STRING(big_buffer);
686			}
687			continue;
688		}
689		end = buffer+nfilled+nread;
690		q = buffer;
691		do {
692			/* Process complete lines */
693			p++;
694			line = PyString_FromStringAndSize(q, p-q);
695			if (line == NULL)
696				goto error;
697			err = PyList_Append(list, line);
698			Py_DECREF(line);
699			if (err != 0)
700				goto error;
701			q = p;
702			p = memchr(q, '\n', end-q);
703		} while (p != NULL);
704		/* Move the remaining incomplete line to the start */
705		nfilled = end-q;
706		memmove(buffer, q, nfilled);
707		if (sizehint > 0)
708			if (totalread >= (size_t)sizehint)
709				break;
710		if (shortread) {
711			sizehint = 0;
712			break;
713		}
714	}
715	if (nfilled != 0) {
716		/* Partial last line */
717		line = PyString_FromStringAndSize(buffer, nfilled);
718		if (line == NULL)
719			goto error;
720		if (sizehint > 0) {
721			/* Need to complete the last line */
722			PyObject *rest = Util_GetLine(self, 0);
723			if (rest == NULL) {
724				Py_DECREF(line);
725				goto error;
726			}
727			PyString_Concat(&line, rest);
728			Py_DECREF(rest);
729			if (line == NULL)
730				goto error;
731		}
732		err = PyList_Append(list, line);
733		Py_DECREF(line);
734		if (err != 0)
735			goto error;
736	}
737
738  cleanup:
739	RELEASE_LOCK(self);
740	if (big_buffer) {
741		Py_DECREF(big_buffer);
742	}
743	return list;
744}
745
746PyDoc_STRVAR(BZ2File_write__doc__,
747"write(data) -> None\n\
748\n\
749Write the 'data' string to file. Note that due to buffering, close() may\n\
750be needed before the file on disk reflects the data written.\n\
751");
752
753/* This is a hacked version of Python's fileobject.c:file_write(). */
754static PyObject *
755BZ2File_write(BZ2FileObject *self, PyObject *args)
756{
757	PyObject *ret = NULL;
758	char *buf;
759	int len;
760	int bzerror;
761
762	if (!PyArg_ParseTuple(args, "s#", &buf, &len))
763		return NULL;
764
765	ACQUIRE_LOCK(self);
766	switch (self->mode) {
767		case MODE_WRITE:
768			break;
769
770		case MODE_CLOSED:
771			PyErr_SetString(PyExc_ValueError,
772					"I/O operation on closed file");
773			goto cleanup;;
774
775		default:
776			PyErr_SetString(PyExc_IOError,
777					"file is not ready for writing");
778			goto cleanup;;
779	}
780
781	PyFile_SoftSpace((PyObject*)self, 0);
782
783	Py_BEGIN_ALLOW_THREADS
784	BZ2_bzWrite (&bzerror, self->fp, buf, len);
785	self->pos += len;
786	Py_END_ALLOW_THREADS
787
788	if (bzerror != BZ_OK) {
789		Util_CatchBZ2Error(bzerror);
790		goto cleanup;
791	}
792
793	Py_INCREF(Py_None);
794	ret = Py_None;
795
796cleanup:
797	RELEASE_LOCK(self);
798	return ret;
799}
800
801PyDoc_STRVAR(BZ2File_writelines__doc__,
802"writelines(sequence_of_strings) -> None\n\
803\n\
804Write the sequence of strings to the file. Note that newlines are not\n\
805added. The sequence can be any iterable object producing strings. This is\n\
806equivalent to calling write() for each string.\n\
807");
808
809/* This is a hacked version of Python's fileobject.c:file_writelines(). */
810static PyObject *
811BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
812{
813#define CHUNKSIZE 1000
814	PyObject *list = NULL;
815	PyObject *iter = NULL;
816	PyObject *ret = NULL;
817	PyObject *line;
818	int i, j, index, len, islist;
819	int bzerror;
820
821	ACQUIRE_LOCK(self);
822	islist = PyList_Check(seq);
823	if  (!islist) {
824		iter = PyObject_GetIter(seq);
825		if (iter == NULL) {
826			PyErr_SetString(PyExc_TypeError,
827				"writelines() requires an iterable argument");
828			goto error;
829		}
830		list = PyList_New(CHUNKSIZE);
831		if (list == NULL)
832			goto error;
833	}
834
835	/* Strategy: slurp CHUNKSIZE lines into a private list,
836	   checking that they are all strings, then write that list
837	   without holding the interpreter lock, then come back for more. */
838	for (index = 0; ; index += CHUNKSIZE) {
839		if (islist) {
840			Py_XDECREF(list);
841			list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
842			if (list == NULL)
843				goto error;
844			j = PyList_GET_SIZE(list);
845		}
846		else {
847			for (j = 0; j < CHUNKSIZE; j++) {
848				line = PyIter_Next(iter);
849				if (line == NULL) {
850					if (PyErr_Occurred())
851						goto error;
852					break;
853				}
854				PyList_SetItem(list, j, line);
855			}
856		}
857		if (j == 0)
858			break;
859
860		/* Check that all entries are indeed strings. If not,
861		   apply the same rules as for file.write() and
862		   convert the rets to strings. This is slow, but
863		   seems to be the only way since all conversion APIs
864		   could potentially execute Python code. */
865		for (i = 0; i < j; i++) {
866			PyObject *v = PyList_GET_ITEM(list, i);
867			if (!PyString_Check(v)) {
868			    	const char *buffer;
869			    	int len;
870				if (PyObject_AsCharBuffer(v, &buffer, &len)) {
871					PyErr_SetString(PyExc_TypeError,
872							"writelines() "
873							"argument must be "
874							"a sequence of "
875							"strings");
876					goto error;
877				}
878				line = PyString_FromStringAndSize(buffer,
879								  len);
880				if (line == NULL)
881					goto error;
882				Py_DECREF(v);
883				PyList_SET_ITEM(list, i, line);
884			}
885		}
886
887		PyFile_SoftSpace((PyObject*)self, 0);
888
889		/* Since we are releasing the global lock, the
890		   following code may *not* execute Python code. */
891		Py_BEGIN_ALLOW_THREADS
892		for (i = 0; i < j; i++) {
893		    	line = PyList_GET_ITEM(list, i);
894			len = PyString_GET_SIZE(line);
895			BZ2_bzWrite (&bzerror, self->fp,
896				     PyString_AS_STRING(line), len);
897			if (bzerror != BZ_OK) {
898				Py_BLOCK_THREADS
899				Util_CatchBZ2Error(bzerror);
900				goto error;
901			}
902		}
903		Py_END_ALLOW_THREADS
904
905		if (j < CHUNKSIZE)
906			break;
907	}
908
909	Py_INCREF(Py_None);
910	ret = Py_None;
911
912  error:
913	RELEASE_LOCK(self);
914	Py_XDECREF(list);
915  	Py_XDECREF(iter);
916	return ret;
917#undef CHUNKSIZE
918}
919
920PyDoc_STRVAR(BZ2File_seek__doc__,
921"seek(offset [, whence]) -> None\n\
922\n\
923Move to new file position. Argument offset is a byte count. Optional\n\
924argument whence defaults to 0 (offset from start of file, offset\n\
925should be >= 0); other values are 1 (move relative to current position,\n\
926positive or negative), and 2 (move relative to end of file, usually\n\
927negative, although many platforms allow seeking beyond the end of a file).\n\
928\n\
929Note that seeking of bz2 files is emulated, and depending on the parameters\n\
930the operation may be extremely slow.\n\
931");
932
933static PyObject *
934BZ2File_seek(BZ2FileObject *self, PyObject *args)
935{
936	int where = 0;
937	long offset;
938	char small_buffer[SMALLCHUNK];
939	char *buffer = small_buffer;
940	size_t buffersize = SMALLCHUNK;
941	int bytesread = 0;
942	int readsize;
943	int chunksize;
944	int bzerror;
945	int rewind = 0;
946	PyObject *func;
947	PyObject *ret = NULL;
948
949	if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where))
950		return NULL;
951
952	ACQUIRE_LOCK(self);
953	Util_DropReadAhead(self);
954	switch (self->mode) {
955		case MODE_READ:
956		case MODE_READ_EOF:
957			break;
958
959		case MODE_CLOSED:
960			PyErr_SetString(PyExc_ValueError,
961					"I/O operation on closed file");
962			goto cleanup;;
963
964		default:
965			PyErr_SetString(PyExc_IOError,
966					"seek works only while reading");
967			goto cleanup;;
968	}
969
970	if (offset < 0) {
971		if (where == 1) {
972			offset = self->pos + offset;
973			rewind = 1;
974		} else if (where == 2) {
975			if (self->size == -1) {
976				assert(self->mode != MODE_READ_EOF);
977				for (;;) {
978					Py_BEGIN_ALLOW_THREADS
979					chunksize = Util_UnivNewlineRead(
980							&bzerror, self->fp,
981							buffer, buffersize,
982							self);
983					self->pos += chunksize;
984					Py_END_ALLOW_THREADS
985
986					bytesread += chunksize;
987					if (bzerror == BZ_STREAM_END) {
988						break;
989					} else if (bzerror != BZ_OK) {
990						Util_CatchBZ2Error(bzerror);
991						goto cleanup;
992					}
993				}
994				self->mode = MODE_READ_EOF;
995				self->size = self->pos;
996				bytesread = 0;
997			}
998			offset = self->size + offset;
999			if (offset >= self->pos)
1000				offset -= self->pos;
1001			else
1002				rewind = 1;
1003		}
1004		if (offset < 0)
1005			offset = 0;
1006	} else if (where == 0) {
1007		if (offset >= self->pos)
1008			offset -= self->pos;
1009		else
1010			rewind = 1;
1011	}
1012
1013	if (rewind) {
1014		BZ2_bzReadClose(&bzerror, self->fp);
1015		func = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1016				     "seek");
1017		if (bzerror != BZ_OK) {
1018			Util_CatchBZ2Error(bzerror);
1019			goto cleanup;
1020		}
1021		if (!func) {
1022			PyErr_SetString(PyExc_RuntimeError,
1023					"can't find file.seek method");
1024			goto cleanup;
1025		}
1026		ret = PyObject_CallFunction(func, "(i)", 0);
1027		if (!ret)
1028			goto cleanup;
1029		Py_DECREF(ret);
1030		ret = NULL;
1031		self->pos = 0;
1032		self->fp = BZ2_bzReadOpen(&bzerror,
1033					  PyFile_AsFile((PyObject*)self),
1034					  0, 0, NULL, 0);
1035		if (bzerror != BZ_OK) {
1036			Util_CatchBZ2Error(bzerror);
1037			goto cleanup;
1038		}
1039		self->mode = MODE_READ;
1040	} else if (self->mode == MODE_READ_EOF) {
1041		goto exit;
1042	}
1043
1044	if (offset == 0)
1045		goto exit;
1046
1047	/* Before getting here, offset must be set to the number of bytes
1048	 * to walk forward. */
1049	for (;;) {
1050		if ((size_t)offset-bytesread > buffersize)
1051			readsize = buffersize;
1052		else
1053			readsize = offset-bytesread;
1054		Py_BEGIN_ALLOW_THREADS
1055		chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1056						 buffer, readsize, self);
1057		self->pos += chunksize;
1058		Py_END_ALLOW_THREADS
1059		bytesread += chunksize;
1060		if (bzerror == BZ_STREAM_END) {
1061			self->size = self->pos;
1062			self->mode = MODE_READ_EOF;
1063			break;
1064		} else if (bzerror != BZ_OK) {
1065			Util_CatchBZ2Error(bzerror);
1066			goto cleanup;
1067		}
1068		if (bytesread == offset)
1069			break;
1070	}
1071
1072exit:
1073	Py_INCREF(Py_None);
1074	ret = Py_None;
1075
1076cleanup:
1077	RELEASE_LOCK(self);
1078	return ret;
1079}
1080
1081PyDoc_STRVAR(BZ2File_tell__doc__,
1082"tell() -> int\n\
1083\n\
1084Return the current file position, an integer (may be a long integer).\n\
1085");
1086
1087static PyObject *
1088BZ2File_tell(BZ2FileObject *self, PyObject *args)
1089{
1090	PyObject *ret = NULL;
1091
1092	if (self->mode == MODE_CLOSED) {
1093		PyErr_SetString(PyExc_ValueError,
1094				"I/O operation on closed file");
1095		goto cleanup;
1096	}
1097
1098	ret = PyInt_FromLong(self->pos);
1099
1100cleanup:
1101	return ret;
1102}
1103
1104PyDoc_STRVAR(BZ2File_notsup__doc__,
1105"Operation not supported.\n\
1106");
1107
1108static PyObject *
1109BZ2File_notsup(BZ2FileObject *self, PyObject *args)
1110{
1111	PyErr_SetString(PyExc_IOError, "operation not supported");
1112	return NULL;
1113}
1114
1115PyDoc_STRVAR(BZ2File_close__doc__,
1116"close() -> None or (perhaps) an integer\n\
1117\n\
1118Close the file. Sets data attribute .closed to true. A closed file\n\
1119cannot be used for further I/O operations. close() may be called more\n\
1120than once without error.\n\
1121");
1122
1123static PyObject *
1124BZ2File_close(BZ2FileObject *self)
1125{
1126	PyObject *file_close;
1127	PyObject *ret = NULL;
1128	int bzerror = BZ_OK;
1129
1130	ACQUIRE_LOCK(self);
1131	switch (self->mode) {
1132		case MODE_READ:
1133		case MODE_READ_EOF:
1134			BZ2_bzReadClose(&bzerror, self->fp);
1135			break;
1136		case MODE_WRITE:
1137			BZ2_bzWriteClose(&bzerror, self->fp,
1138					 0, NULL, NULL);
1139			break;
1140	}
1141	self->mode = MODE_CLOSED;
1142	file_close = Py_FindMethod(PyFile_Type.tp_methods, (PyObject*)self,
1143				   "close");
1144	if (!file_close) {
1145		PyErr_SetString(PyExc_RuntimeError,
1146				"can't find file.close method");
1147		goto cleanup;
1148	}
1149	ret = PyObject_CallObject(file_close, NULL);
1150	if (bzerror != BZ_OK) {
1151		Util_CatchBZ2Error(bzerror);
1152		Py_XDECREF(ret);
1153		ret = NULL;
1154		goto cleanup;
1155	}
1156
1157cleanup:
1158	RELEASE_LOCK(self);
1159	return ret;
1160}
1161
1162static PyMethodDef BZ2File_methods[] = {
1163	{"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1164	{"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1165	{"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1166	{"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1167	{"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1168	{"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1169	{"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1170	{"truncate", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1171	{"readinto", (PyCFunction)BZ2File_notsup, METH_VARARGS, BZ2File_notsup__doc__},
1172	{"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1173	{NULL,		NULL}		/* sentinel */
1174};
1175
1176
1177/* ===================================================================== */
1178/* Slot definitions for BZ2File_Type. */
1179
1180static int
1181BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1182{
1183	PyObject *file_args = NULL;
1184	static char *kwlist[] = {"filename", "mode", "buffering",
1185				 "compresslevel", 0};
1186	char *name = NULL;
1187	char *mode = "r";
1188	int buffering = -1;
1189	int compresslevel = 9;
1190	int bzerror;
1191	int mode_char = 0;
1192	int univ_newline = 0;
1193
1194	self->size = -1;
1195
1196	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "et|sii:BZ2File",
1197					 kwlist, Py_FileSystemDefaultEncoding,
1198					 &name, &mode, &buffering,
1199					 &compresslevel))
1200		return -1;
1201
1202	if (compresslevel < 1 || compresslevel > 9) {
1203		PyErr_SetString(PyExc_ValueError,
1204				"compresslevel must be between 1 and 9");
1205		return -1;
1206	}
1207
1208	for (;;) {
1209		int error = 0;
1210		switch (*mode) {
1211			case 'r':
1212			case 'w':
1213				if (mode_char)
1214					error = 1;
1215				mode_char = *mode;
1216				break;
1217
1218			case 'b':
1219				break;
1220
1221			case 'U':
1222				univ_newline = 1;
1223				break;
1224
1225			default:
1226				error = 1;
1227				break;
1228		}
1229		if (error) {
1230			PyErr_Format(PyExc_ValueError,
1231				     "invalid mode char %c", *mode);
1232			return -1;
1233		}
1234		mode++;
1235		if (*mode == '\0')
1236			break;
1237	}
1238
1239	if (mode_char == 'r')
1240		mode = univ_newline ? "rbU" : "rb";
1241	else
1242		mode = univ_newline ? "wbU" : "wb";
1243
1244	file_args = Py_BuildValue("(ssi)", name, mode, buffering);
1245	if (!file_args)
1246		return -1;
1247
1248	/* From now on, we have stuff to dealloc, so jump to error label
1249	 * instead of returning */
1250
1251	if (PyFile_Type.tp_init((PyObject *)self, file_args, NULL) < 0)
1252		goto error;
1253
1254#ifdef WITH_THREAD
1255	self->lock = PyThread_allocate_lock();
1256	if (!self->lock)
1257		goto error;
1258#endif
1259
1260	if (mode_char == 'r')
1261		self->fp = BZ2_bzReadOpen(&bzerror,
1262					  PyFile_AsFile((PyObject*)self),
1263					  0, 0, NULL, 0);
1264	else
1265		self->fp = BZ2_bzWriteOpen(&bzerror,
1266					   PyFile_AsFile((PyObject*)self),
1267					   compresslevel, 0, 0);
1268
1269	if (bzerror != BZ_OK) {
1270		Util_CatchBZ2Error(bzerror);
1271		goto error;
1272	}
1273
1274	self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1275
1276	Py_XDECREF(file_args);
1277	PyMem_Free(name);
1278	return 0;
1279
1280error:
1281#ifdef WITH_THREAD
1282	if (self->lock)
1283		PyThread_free_lock(self->lock);
1284#endif
1285	Py_XDECREF(file_args);
1286	PyMem_Free(name);
1287	return -1;
1288}
1289
1290static void
1291BZ2File_dealloc(BZ2FileObject *self)
1292{
1293	int bzerror;
1294#ifdef WITH_THREAD
1295	if (self->lock)
1296		PyThread_free_lock(self->lock);
1297#endif
1298	switch (self->mode) {
1299		case MODE_READ:
1300		case MODE_READ_EOF:
1301			BZ2_bzReadClose(&bzerror, self->fp);
1302			break;
1303		case MODE_WRITE:
1304			BZ2_bzWriteClose(&bzerror, self->fp,
1305					 0, NULL, NULL);
1306			break;
1307	}
1308	Util_DropReadAhead(self);
1309	((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1310}
1311
1312/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1313static PyObject *
1314BZ2File_getiter(BZ2FileObject *self)
1315{
1316	if (self->mode == MODE_CLOSED) {
1317		PyErr_SetString(PyExc_ValueError,
1318				"I/O operation on closed file");
1319		return NULL;
1320	}
1321	Py_INCREF((PyObject*)self);
1322	return (PyObject *)self;
1323}
1324
1325/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1326#define READAHEAD_BUFSIZE 8192
1327static PyObject *
1328BZ2File_iternext(BZ2FileObject *self)
1329{
1330	PyStringObject* ret;
1331	ACQUIRE_LOCK(self);
1332	if (self->mode == MODE_CLOSED) {
1333		PyErr_SetString(PyExc_ValueError,
1334				"I/O operation on closed file");
1335		return NULL;
1336	}
1337	ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1338	RELEASE_LOCK(self);
1339	if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1340		Py_XDECREF(ret);
1341		return NULL;
1342	}
1343	return (PyObject *)ret;
1344}
1345
1346/* ===================================================================== */
1347/* BZ2File_Type definition. */
1348
1349PyDoc_VAR(BZ2File__doc__) =
1350PyDoc_STR(
1351"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1352\n\
1353Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1354writing. When opened for writing, the file will be created if it doesn't\n\
1355exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1356unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1357is given, must be a number between 1 and 9.\n\
1358")
1359#ifdef WITH_UNIVERSAL_NEWLINES
1360PyDoc_STR(
1361"\n\
1362Add a 'U' to mode to open the file for input with universal newline\n\
1363support. Any line ending in the input file will be seen as a '\\n' in\n\
1364Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1365for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1366'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1367newlines are available only when reading.\n\
1368")
1369#endif
1370;
1371
1372static PyTypeObject BZ2File_Type = {
1373	PyObject_HEAD_INIT(NULL)
1374	0,			/*ob_size*/
1375	"bz2.BZ2File",		/*tp_name*/
1376	sizeof(BZ2FileObject),	/*tp_basicsize*/
1377	0,			/*tp_itemsize*/
1378	(destructor)BZ2File_dealloc, /*tp_dealloc*/
1379	0,			/*tp_print*/
1380	0,			/*tp_getattr*/
1381	0,			/*tp_setattr*/
1382	0,			/*tp_compare*/
1383	0,			/*tp_repr*/
1384	0,			/*tp_as_number*/
1385	0,			/*tp_as_sequence*/
1386	0,			/*tp_as_mapping*/
1387	0,			/*tp_hash*/
1388        0,                      /*tp_call*/
1389        0,                      /*tp_str*/
1390        PyObject_GenericGetAttr,/*tp_getattro*/
1391        PyObject_GenericSetAttr,/*tp_setattro*/
1392        0,                      /*tp_as_buffer*/
1393        Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1394        BZ2File__doc__,         /*tp_doc*/
1395        0,                      /*tp_traverse*/
1396        0,                      /*tp_clear*/
1397        0,                      /*tp_richcompare*/
1398        0,                      /*tp_weaklistoffset*/
1399        (getiterfunc)BZ2File_getiter, /*tp_iter*/
1400        (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1401        BZ2File_methods,        /*tp_methods*/
1402        0,                      /*tp_members*/
1403        0,                      /*tp_getset*/
1404        0,                      /*tp_base*/
1405        0,                      /*tp_dict*/
1406        0,                      /*tp_descr_get*/
1407        0,                      /*tp_descr_set*/
1408        0,                      /*tp_dictoffset*/
1409        (initproc)BZ2File_init, /*tp_init*/
1410        PyType_GenericAlloc,    /*tp_alloc*/
1411        0,                      /*tp_new*/
1412      	_PyObject_Del,          /*tp_free*/
1413        0,                      /*tp_is_gc*/
1414};
1415
1416
1417/* ===================================================================== */
1418/* Methods of BZ2Comp. */
1419
1420PyDoc_STRVAR(BZ2Comp_compress__doc__,
1421"compress(data) -> string\n\
1422\n\
1423Provide more data to the compressor object. It will return chunks of\n\
1424compressed data whenever possible. When you've finished providing data\n\
1425to compress, call the flush() method to finish the compression process,\n\
1426and return what is left in the internal buffers.\n\
1427");
1428
1429static PyObject *
1430BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1431{
1432	char *data;
1433	int datasize;
1434	int bufsize = SMALLCHUNK;
1435	LONG_LONG totalout;
1436	PyObject *ret = NULL;
1437	bz_stream *bzs = &self->bzs;
1438	int bzerror;
1439
1440	if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1441		return NULL;
1442
1443	ACQUIRE_LOCK(self);
1444	if (!self->running) {
1445		PyErr_SetString(PyExc_ValueError,
1446				"this object was already flushed");
1447		goto error;
1448	}
1449
1450	ret = PyString_FromStringAndSize(NULL, bufsize);
1451	if (!ret)
1452		goto error;
1453
1454	bzs->next_in = data;
1455	bzs->avail_in = datasize;
1456	bzs->next_out = BUF(ret);
1457	bzs->avail_out = bufsize;
1458
1459	totalout = BZS_TOTAL_OUT(bzs);
1460
1461	for (;;) {
1462		Py_BEGIN_ALLOW_THREADS
1463		bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1464		Py_END_ALLOW_THREADS
1465		if (bzerror != BZ_RUN_OK) {
1466			Util_CatchBZ2Error(bzerror);
1467			goto error;
1468		}
1469		if (bzs->avail_out == 0) {
1470			bufsize = Util_NewBufferSize(bufsize);
1471			if (_PyString_Resize(&ret, bufsize) < 0) {
1472				BZ2_bzCompressEnd(bzs);
1473				goto error;
1474			}
1475			bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1476						    - totalout);
1477			bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1478		} else if (bzs->avail_in == 0) {
1479			break;
1480		}
1481	}
1482
1483	_PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1484
1485	RELEASE_LOCK(self);
1486	return ret;
1487
1488error:
1489	RELEASE_LOCK(self);
1490	Py_XDECREF(ret);
1491	return NULL;
1492}
1493
1494PyDoc_STRVAR(BZ2Comp_flush__doc__,
1495"flush() -> string\n\
1496\n\
1497Finish the compression process and return what is left in internal buffers.\n\
1498You must not use the compressor object after calling this method.\n\
1499");
1500
1501static PyObject *
1502BZ2Comp_flush(BZ2CompObject *self)
1503{
1504	int bufsize = SMALLCHUNK;
1505	PyObject *ret = NULL;
1506	bz_stream *bzs = &self->bzs;
1507	LONG_LONG totalout;
1508	int bzerror;
1509
1510	ACQUIRE_LOCK(self);
1511	if (!self->running) {
1512		PyErr_SetString(PyExc_ValueError, "object was already "
1513						  "flushed");
1514		goto error;
1515	}
1516	self->running = 0;
1517
1518	ret = PyString_FromStringAndSize(NULL, bufsize);
1519	if (!ret)
1520		goto error;
1521
1522	bzs->next_out = BUF(ret);
1523	bzs->avail_out = bufsize;
1524
1525	totalout = BZS_TOTAL_OUT(bzs);
1526
1527	for (;;) {
1528		Py_BEGIN_ALLOW_THREADS
1529		bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1530		Py_END_ALLOW_THREADS
1531		if (bzerror == BZ_STREAM_END) {
1532			break;
1533		} else if (bzerror != BZ_FINISH_OK) {
1534			Util_CatchBZ2Error(bzerror);
1535			goto error;
1536		}
1537		if (bzs->avail_out == 0) {
1538			bufsize = Util_NewBufferSize(bufsize);
1539			if (_PyString_Resize(&ret, bufsize) < 0)
1540				goto error;
1541			bzs->next_out = BUF(ret);
1542			bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1543						    - totalout);
1544			bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1545		}
1546	}
1547
1548	if (bzs->avail_out != 0)
1549		_PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1550
1551	RELEASE_LOCK(self);
1552	return ret;
1553
1554error:
1555	RELEASE_LOCK(self);
1556	Py_XDECREF(ret);
1557	return NULL;
1558}
1559
1560static PyMethodDef BZ2Comp_methods[] = {
1561	{"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1562	 BZ2Comp_compress__doc__},
1563	{"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1564	 BZ2Comp_flush__doc__},
1565	{NULL,		NULL}		/* sentinel */
1566};
1567
1568
1569/* ===================================================================== */
1570/* Slot definitions for BZ2Comp_Type. */
1571
1572static int
1573BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1574{
1575	int compresslevel = 9;
1576	int bzerror;
1577	static char *kwlist[] = {"compresslevel", 0};
1578
1579	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1580					 kwlist, &compresslevel))
1581		return -1;
1582
1583	if (compresslevel < 1 || compresslevel > 9) {
1584		PyErr_SetString(PyExc_ValueError,
1585				"compresslevel must be between 1 and 9");
1586		goto error;
1587	}
1588
1589#ifdef WITH_THREAD
1590	self->lock = PyThread_allocate_lock();
1591	if (!self->lock)
1592		goto error;
1593#endif
1594
1595	memset(&self->bzs, 0, sizeof(bz_stream));
1596	bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1597	if (bzerror != BZ_OK) {
1598		Util_CatchBZ2Error(bzerror);
1599		goto error;
1600	}
1601
1602	self->running = 1;
1603
1604	return 0;
1605error:
1606#ifdef WITH_THREAD
1607	if (self->lock)
1608		PyThread_free_lock(self->lock);
1609#endif
1610	return -1;
1611}
1612
1613static void
1614BZ2Comp_dealloc(BZ2CompObject *self)
1615{
1616#ifdef WITH_THREAD
1617	if (self->lock)
1618		PyThread_free_lock(self->lock);
1619#endif
1620	BZ2_bzCompressEnd(&self->bzs);
1621	((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1622}
1623
1624
1625/* ===================================================================== */
1626/* BZ2Comp_Type definition. */
1627
1628PyDoc_STRVAR(BZ2Comp__doc__,
1629"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1630\n\
1631Create a new compressor object. This object may be used to compress\n\
1632data sequentially. If you want to compress data in one shot, use the\n\
1633compress() function instead. The compresslevel parameter, if given,\n\
1634must be a number between 1 and 9.\n\
1635");
1636
1637static PyTypeObject BZ2Comp_Type = {
1638	PyObject_HEAD_INIT(NULL)
1639	0,			/*ob_size*/
1640	"bz2.BZ2Compressor",	/*tp_name*/
1641	sizeof(BZ2CompObject),	/*tp_basicsize*/
1642	0,			/*tp_itemsize*/
1643	(destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1644	0,			/*tp_print*/
1645	0,			/*tp_getattr*/
1646	0,			/*tp_setattr*/
1647	0,			/*tp_compare*/
1648	0,			/*tp_repr*/
1649	0,			/*tp_as_number*/
1650	0,			/*tp_as_sequence*/
1651	0,			/*tp_as_mapping*/
1652	0,			/*tp_hash*/
1653        0,                      /*tp_call*/
1654        0,                      /*tp_str*/
1655        PyObject_GenericGetAttr,/*tp_getattro*/
1656        PyObject_GenericSetAttr,/*tp_setattro*/
1657        0,                      /*tp_as_buffer*/
1658        Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1659        BZ2Comp__doc__,         /*tp_doc*/
1660        0,                      /*tp_traverse*/
1661        0,                      /*tp_clear*/
1662        0,                      /*tp_richcompare*/
1663        0,                      /*tp_weaklistoffset*/
1664        0,                      /*tp_iter*/
1665        0,                      /*tp_iternext*/
1666        BZ2Comp_methods,        /*tp_methods*/
1667        0,                      /*tp_members*/
1668        0,                      /*tp_getset*/
1669        0,                      /*tp_base*/
1670        0,                      /*tp_dict*/
1671        0,                      /*tp_descr_get*/
1672        0,                      /*tp_descr_set*/
1673        0,                      /*tp_dictoffset*/
1674        (initproc)BZ2Comp_init, /*tp_init*/
1675        PyType_GenericAlloc,    /*tp_alloc*/
1676        PyType_GenericNew,      /*tp_new*/
1677      	_PyObject_Del,          /*tp_free*/
1678        0,                      /*tp_is_gc*/
1679};
1680
1681
1682/* ===================================================================== */
1683/* Members of BZ2Decomp. */
1684
1685#define OFF(x) offsetof(BZ2DecompObject, x)
1686
1687static PyMemberDef BZ2Decomp_members[] = {
1688	{"unused_data", T_OBJECT, OFF(unused_data), RO},
1689	{NULL}	/* Sentinel */
1690};
1691
1692
1693/* ===================================================================== */
1694/* Methods of BZ2Decomp. */
1695
1696PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1697"decompress(data) -> string\n\
1698\n\
1699Provide more data to the decompressor object. It will return chunks\n\
1700of decompressed data whenever possible. If you try to decompress data\n\
1701after the end of stream is found, EOFError will be raised. If any data\n\
1702was found after the end of stream, it'll be ignored and saved in\n\
1703unused_data attribute.\n\
1704");
1705
1706static PyObject *
1707BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1708{
1709	char *data;
1710	int datasize;
1711	int bufsize = SMALLCHUNK;
1712	LONG_LONG totalout;
1713	PyObject *ret = NULL;
1714	bz_stream *bzs = &self->bzs;
1715	int bzerror;
1716
1717	if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
1718		return NULL;
1719
1720	ACQUIRE_LOCK(self);
1721	if (!self->running) {
1722		PyErr_SetString(PyExc_EOFError, "end of stream was "
1723						"already found");
1724		goto error;
1725	}
1726
1727	ret = PyString_FromStringAndSize(NULL, bufsize);
1728	if (!ret)
1729		goto error;
1730
1731	bzs->next_in = data;
1732	bzs->avail_in = datasize;
1733	bzs->next_out = BUF(ret);
1734	bzs->avail_out = bufsize;
1735
1736	totalout = BZS_TOTAL_OUT(bzs);
1737
1738	for (;;) {
1739		Py_BEGIN_ALLOW_THREADS
1740		bzerror = BZ2_bzDecompress(bzs);
1741		Py_END_ALLOW_THREADS
1742		if (bzerror == BZ_STREAM_END) {
1743			if (bzs->avail_in != 0) {
1744				Py_DECREF(self->unused_data);
1745				self->unused_data =
1746				    PyString_FromStringAndSize(bzs->next_in,
1747							       bzs->avail_in);
1748			}
1749			self->running = 0;
1750			break;
1751		}
1752		if (bzerror != BZ_OK) {
1753			Util_CatchBZ2Error(bzerror);
1754			goto error;
1755		}
1756		if (bzs->avail_out == 0) {
1757			bufsize = Util_NewBufferSize(bufsize);
1758			if (_PyString_Resize(&ret, bufsize) < 0) {
1759				BZ2_bzDecompressEnd(bzs);
1760				goto error;
1761			}
1762			bzs->next_out = BUF(ret);
1763			bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1764						    - totalout);
1765			bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1766		} else if (bzs->avail_in == 0) {
1767			break;
1768		}
1769	}
1770
1771	if (bzs->avail_out != 0)
1772		_PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1773
1774	RELEASE_LOCK(self);
1775	return ret;
1776
1777error:
1778	RELEASE_LOCK(self);
1779	Py_XDECREF(ret);
1780	return NULL;
1781}
1782
1783static PyMethodDef BZ2Decomp_methods[] = {
1784	{"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1785	{NULL,		NULL}		/* sentinel */
1786};
1787
1788
1789/* ===================================================================== */
1790/* Slot definitions for BZ2Decomp_Type. */
1791
1792static int
1793BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1794{
1795	int bzerror;
1796
1797	if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1798		return -1;
1799
1800#ifdef WITH_THREAD
1801	self->lock = PyThread_allocate_lock();
1802	if (!self->lock)
1803		goto error;
1804#endif
1805
1806	self->unused_data = PyString_FromString("");
1807	if (!self->unused_data)
1808		goto error;
1809
1810	memset(&self->bzs, 0, sizeof(bz_stream));
1811	bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1812	if (bzerror != BZ_OK) {
1813		Util_CatchBZ2Error(bzerror);
1814		goto error;
1815	}
1816
1817	self->running = 1;
1818
1819	return 0;
1820
1821error:
1822#ifdef WITH_THREAD
1823	if (self->lock)
1824		PyThread_free_lock(self->lock);
1825#endif
1826	Py_XDECREF(self->unused_data);
1827	return -1;
1828}
1829
1830static void
1831BZ2Decomp_dealloc(BZ2DecompObject *self)
1832{
1833#ifdef WITH_THREAD
1834	if (self->lock)
1835		PyThread_free_lock(self->lock);
1836#endif
1837	Py_XDECREF(self->unused_data);
1838	BZ2_bzDecompressEnd(&self->bzs);
1839	((PyObject*)self)->ob_type->tp_free((PyObject *)self);
1840}
1841
1842
1843/* ===================================================================== */
1844/* BZ2Decomp_Type definition. */
1845
1846PyDoc_STRVAR(BZ2Decomp__doc__,
1847"BZ2Decompressor() -> decompressor object\n\
1848\n\
1849Create a new decompressor object. This object may be used to decompress\n\
1850data sequentially. If you want to decompress data in one shot, use the\n\
1851decompress() function instead.\n\
1852");
1853
1854static PyTypeObject BZ2Decomp_Type = {
1855	PyObject_HEAD_INIT(NULL)
1856	0,			/*ob_size*/
1857	"bz2.BZ2Decompressor",	/*tp_name*/
1858	sizeof(BZ2DecompObject), /*tp_basicsize*/
1859	0,			/*tp_itemsize*/
1860	(destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1861	0,			/*tp_print*/
1862	0,			/*tp_getattr*/
1863	0,			/*tp_setattr*/
1864	0,			/*tp_compare*/
1865	0,			/*tp_repr*/
1866	0,			/*tp_as_number*/
1867	0,			/*tp_as_sequence*/
1868	0,			/*tp_as_mapping*/
1869	0,			/*tp_hash*/
1870        0,                      /*tp_call*/
1871        0,                      /*tp_str*/
1872        PyObject_GenericGetAttr,/*tp_getattro*/
1873        PyObject_GenericSetAttr,/*tp_setattro*/
1874        0,                      /*tp_as_buffer*/
1875        Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1876        BZ2Decomp__doc__,       /*tp_doc*/
1877        0,                      /*tp_traverse*/
1878        0,                      /*tp_clear*/
1879        0,                      /*tp_richcompare*/
1880        0,                      /*tp_weaklistoffset*/
1881        0,                      /*tp_iter*/
1882        0,                      /*tp_iternext*/
1883        BZ2Decomp_methods,      /*tp_methods*/
1884        BZ2Decomp_members,      /*tp_members*/
1885        0,                      /*tp_getset*/
1886        0,                      /*tp_base*/
1887        0,                      /*tp_dict*/
1888        0,                      /*tp_descr_get*/
1889        0,                      /*tp_descr_set*/
1890        0,                      /*tp_dictoffset*/
1891        (initproc)BZ2Decomp_init, /*tp_init*/
1892        PyType_GenericAlloc,    /*tp_alloc*/
1893        PyType_GenericNew,      /*tp_new*/
1894      	_PyObject_Del,          /*tp_free*/
1895        0,                      /*tp_is_gc*/
1896};
1897
1898
1899/* ===================================================================== */
1900/* Module functions. */
1901
1902PyDoc_STRVAR(bz2_compress__doc__,
1903"compress(data [, compresslevel=9]) -> string\n\
1904\n\
1905Compress data in one shot. If you want to compress data sequentially,\n\
1906use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1907given, must be a number between 1 and 9.\n\
1908");
1909
1910static PyObject *
1911bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1912{
1913	int compresslevel=9;
1914	char *data;
1915	int datasize;
1916	int bufsize;
1917	PyObject *ret = NULL;
1918	bz_stream _bzs;
1919	bz_stream *bzs = &_bzs;
1920	int bzerror;
1921	static char *kwlist[] = {"data", "compresslevel", 0};
1922
1923	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
1924					 kwlist, &data, &datasize,
1925					 &compresslevel))
1926		return NULL;
1927
1928	if (compresslevel < 1 || compresslevel > 9) {
1929		PyErr_SetString(PyExc_ValueError,
1930				"compresslevel must be between 1 and 9");
1931		return NULL;
1932	}
1933
1934	/* Conforming to bz2 manual, this is large enough to fit compressed
1935	 * data in one shot. We will check it later anyway. */
1936	bufsize = datasize + (datasize/100+1) + 600;
1937
1938	ret = PyString_FromStringAndSize(NULL, bufsize);
1939	if (!ret)
1940		return NULL;
1941
1942	memset(bzs, 0, sizeof(bz_stream));
1943
1944	bzs->next_in = data;
1945	bzs->avail_in = datasize;
1946	bzs->next_out = BUF(ret);
1947	bzs->avail_out = bufsize;
1948
1949	bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1950	if (bzerror != BZ_OK) {
1951		Util_CatchBZ2Error(bzerror);
1952		Py_DECREF(ret);
1953		return NULL;
1954	}
1955
1956	for (;;) {
1957		Py_BEGIN_ALLOW_THREADS
1958		bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1959		Py_END_ALLOW_THREADS
1960		if (bzerror == BZ_STREAM_END) {
1961			break;
1962		} else if (bzerror != BZ_FINISH_OK) {
1963			BZ2_bzCompressEnd(bzs);
1964			Util_CatchBZ2Error(bzerror);
1965			Py_DECREF(ret);
1966			return NULL;
1967		}
1968		if (bzs->avail_out == 0) {
1969			bufsize = Util_NewBufferSize(bufsize);
1970			if (_PyString_Resize(&ret, bufsize) < 0) {
1971				BZ2_bzCompressEnd(bzs);
1972				Py_DECREF(ret);
1973				return NULL;
1974			}
1975			bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1976			bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1977		}
1978	}
1979
1980	if (bzs->avail_out != 0)
1981		_PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
1982	BZ2_bzCompressEnd(bzs);
1983
1984	return ret;
1985}
1986
1987PyDoc_STRVAR(bz2_decompress__doc__,
1988"decompress(data) -> decompressed data\n\
1989\n\
1990Decompress data in one shot. If you want to decompress data sequentially,\n\
1991use an instance of BZ2Decompressor instead.\n\
1992");
1993
1994static PyObject *
1995bz2_decompress(PyObject *self, PyObject *args)
1996{
1997	char *data;
1998	int datasize;
1999	int bufsize = SMALLCHUNK;
2000	PyObject *ret;
2001	bz_stream _bzs;
2002	bz_stream *bzs = &_bzs;
2003	int bzerror;
2004
2005	if (!PyArg_ParseTuple(args, "s#", &data, &datasize))
2006		return NULL;
2007
2008	if (datasize == 0)
2009		return PyString_FromString("");
2010
2011	ret = PyString_FromStringAndSize(NULL, bufsize);
2012	if (!ret)
2013		return NULL;
2014
2015	memset(bzs, 0, sizeof(bz_stream));
2016
2017	bzs->next_in = data;
2018	bzs->avail_in = datasize;
2019	bzs->next_out = BUF(ret);
2020	bzs->avail_out = bufsize;
2021
2022	bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2023	if (bzerror != BZ_OK) {
2024		Util_CatchBZ2Error(bzerror);
2025		Py_DECREF(ret);
2026		return NULL;
2027	}
2028
2029	for (;;) {
2030		Py_BEGIN_ALLOW_THREADS
2031		bzerror = BZ2_bzDecompress(bzs);
2032		Py_END_ALLOW_THREADS
2033		if (bzerror == BZ_STREAM_END) {
2034			break;
2035		} else if (bzerror != BZ_OK) {
2036			BZ2_bzDecompressEnd(bzs);
2037			Util_CatchBZ2Error(bzerror);
2038			Py_DECREF(ret);
2039			return NULL;
2040		}
2041		if (bzs->avail_out == 0) {
2042			bufsize = Util_NewBufferSize(bufsize);
2043			if (_PyString_Resize(&ret, bufsize) < 0) {
2044				BZ2_bzDecompressEnd(bzs);
2045				Py_DECREF(ret);
2046				return NULL;
2047			}
2048			bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2049			bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2050		} else if (bzs->avail_in == 0) {
2051			BZ2_bzDecompressEnd(bzs);
2052			PyErr_SetString(PyExc_ValueError,
2053					"couldn't find end of stream");
2054			Py_DECREF(ret);
2055			return NULL;
2056		}
2057	}
2058
2059	if (bzs->avail_out != 0)
2060		_PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2061	BZ2_bzDecompressEnd(bzs);
2062
2063	return ret;
2064}
2065
2066static PyMethodDef bz2_methods[] = {
2067	{"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2068		bz2_compress__doc__},
2069	{"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2070		bz2_decompress__doc__},
2071	{NULL,		NULL}		/* sentinel */
2072};
2073
2074/* ===================================================================== */
2075/* Initialization function. */
2076
2077PyDoc_STRVAR(bz2__doc__,
2078"The python bz2 module provides a comprehensive interface for\n\
2079the bz2 compression library. It implements a complete file\n\
2080interface, one shot (de)compression functions, and types for\n\
2081sequential (de)compression.\n\
2082");
2083
2084DL_EXPORT(void)
2085initbz2(void)
2086{
2087	PyObject *m;
2088
2089	BZ2File_Type.ob_type = &PyType_Type;
2090	BZ2File_Type.tp_base = &PyFile_Type;
2091	BZ2File_Type.tp_new = PyFile_Type.tp_new;
2092
2093	BZ2Comp_Type.ob_type = &PyType_Type;
2094	BZ2Decomp_Type.ob_type = &PyType_Type;
2095
2096	m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2097
2098	PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2099
2100	Py_INCREF(&BZ2File_Type);
2101	PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2102
2103	Py_INCREF(&BZ2Comp_Type);
2104	PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2105
2106	Py_INCREF(&BZ2Decomp_Type);
2107	PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2108}
2109