14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""Functions that read and write gzipped files.
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe user of the file doesn't have to worry about the compression,
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbut random access is not allowed."""
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# based on Andrew Kuchling's minigzip.py distributed with the zlib module
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport struct, sys, time, os
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport zlib
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport io
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport __builtin__
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = ["GzipFile","open"]
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmFTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmREAD, WRITE = 1, 2
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef write32u(output, value):
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # The L format writes the bit pattern correctly whether signed
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # or unsigned.
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    output.write(struct.pack("<L", value))
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef read32(input):
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return struct.unpack("<I", input.read(4))[0]
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef open(filename, mode="rb", compresslevel=9):
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Shorthand for GzipFile(filename, mode, compresslevel).
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The filename argument is required; mode defaults to 'rb'
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    and compresslevel defaults to 9.
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return GzipFile(filename, mode, compresslevel)
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass GzipFile(io.BufferedIOBase):
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """The GzipFile class simulates most of the methods of a file object with
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    the exception of the readinto() and truncate() methods.
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    myfileobj = None
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    max_read_chunk = 10 * 1024 * 1024   # 10Mb
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, filename=None, mode=None,
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 compresslevel=9, fileobj=None, mtime=None):
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Constructor for the GzipFile class.
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        At least one of fileobj and filename must be given a
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        non-trivial value.
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The new class instance is based on fileobj, which can be a regular
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        file, a StringIO object, or any other object which simulates a file.
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        It defaults to None, in which case filename is opened to provide
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        a file object.
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        When fileobj is not None, the filename argument is only used to be
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        included in the gzip file header, which may includes the original
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        filename of the uncompressed file.  It defaults to the filename of
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fileobj, if discernible; otherwise, it defaults to the empty string,
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        and in this case the original filename is not included in the header.
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        depending on whether the file will be read or written.  The default
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        is the mode of fileobj if discernible; otherwise, the default is 'rb'.
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Be aware that only the 'rb', 'ab', and 'wb' values should be used
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cross-platform portability.
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The compresslevel argument is an integer from 1 to 9 controlling the
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        level of compression; 1 is fastest and produces the least compression,
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        and 9 is slowest and produces the most compression.  The default is 9.
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The mtime argument is an optional numeric timestamp to be written
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        to the stream when compressing.  All gzip compressed streams
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        are required to contain a timestamp.  If omitted or None, the
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        current time is used.  This module ignores the timestamp when
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        decompressing; however, some programs, such as gunzip, make use
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        of it.  The format of the timestamp is the same as that of the
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return value of time.time() and of the st_mtime member of the
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        object returned by os.stat().
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # guarantee the file is opened in binary mode on platforms
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # that care about that sort of thing
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if mode and 'b' not in mode:
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            mode += 'b'
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if fileobj is None:
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if filename is None:
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if hasattr(fileobj, 'name'): filename = fileobj.name
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else: filename = ''
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if mode is None:
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if hasattr(fileobj, 'mode'): mode = fileobj.mode
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else: mode = 'rb'
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if mode[0:1] == 'r':
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.mode = READ
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Set flag indicating start of a new member
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._new_member = True
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Buffer data read from gzip file. extrastart is offset in
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # stream where buffer starts. extrasize is number of
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # bytes remaining in buffer from current stream position.
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.extrabuf = ""
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.extrasize = 0
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.extrastart = 0
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.name = filename
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Starts small, scales exponentially
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.min_readsize = 100
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif mode[0:1] == 'w' or mode[0:1] == 'a':
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.mode = WRITE
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._init_write(filename)
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.compress = zlib.compressobj(compresslevel,
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                             zlib.DEFLATED,
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                             -zlib.MAX_WBITS,
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                             zlib.DEF_MEM_LEVEL,
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                             0)
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError, "Mode " + mode + " not supported"
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj = fileobj
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.offset = 0
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.mtime = mtime
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode == WRITE:
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._write_gzip_header()
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    @property
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def filename(self):
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        import warnings
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        warnings.warn("use the name attribute", DeprecationWarning, 2)
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode == WRITE and self.name[-3:] != ".gz":
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return self.name + ".gz"
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.name
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __repr__(self):
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s = repr(self.fileobj)
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _check_closed(self):
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Raises a ValueError if the underlying file object has been closed.
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.closed:
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise ValueError('I/O operation on closed file.')
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _init_write(self, filename):
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.name = filename
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.crc = zlib.crc32("") & 0xffffffffL
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.size = 0
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.writebuf = []
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.bufsize = 0
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _write_gzip_header(self):
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.write('\037\213')             # magic header
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.write('\010')                 # compression method
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fname = os.path.basename(self.name)
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if fname.endswith(".gz"):
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fname = fname[:-3]
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        flags = 0
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if fname:
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            flags = FNAME
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.write(chr(flags))
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        mtime = self.mtime
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if mtime is None:
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            mtime = time.time()
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        write32u(self.fileobj, long(mtime))
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.write('\002')
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.write('\377')
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if fname:
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.write(fname + '\000')
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _init_read(self):
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.crc = zlib.crc32("") & 0xffffffffL
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.size = 0
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _read_gzip_header(self):
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        magic = self.fileobj.read(2)
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if magic != '\037\213':
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError, 'Not a gzipped file'
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        method = ord( self.fileobj.read(1) )
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if method != 8:
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError, 'Unknown compression method'
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        flag = ord( self.fileobj.read(1) )
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.mtime = read32(self.fileobj)
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # extraflag = self.fileobj.read(1)
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # os = self.fileobj.read(1)
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.read(2)
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if flag & FEXTRA:
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Read & discard the extra field, if present
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            xlen = ord(self.fileobj.read(1))
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            xlen = xlen + 256*ord(self.fileobj.read(1))
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.read(xlen)
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if flag & FNAME:
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Read and discard a null-terminated string containing the filename
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            while True:
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s = self.fileobj.read(1)
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if not s or s=='\000':
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    break
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if flag & FCOMMENT:
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Read and discard a null-terminated string containing a comment
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            while True:
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s = self.fileobj.read(1)
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if not s or s=='\000':
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    break
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if flag & FHCRC:
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def write(self,data):
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._check_closed()
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode != WRITE:
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            import errno
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError(errno.EBADF, "write() on read-only GzipFile object")
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.fileobj is None:
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise ValueError, "write() on closed GzipFile object"
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Convert data type if called by io.BufferedWriter.
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if isinstance(data, memoryview):
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            data = data.tobytes()
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if len(data) > 0:
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.size = self.size + len(data)
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.write( self.compress.compress(data) )
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.offset += len(data)
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return len(data)
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def read(self, size=-1):
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._check_closed()
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode != READ:
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            import errno
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.extrasize <= 0 and self.fileobj is None:
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return ''
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        readsize = 1024
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if size < 0:        # get the whole thing
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                while True:
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._read(readsize)
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    readsize = min(self.max_read_chunk, readsize * 2)
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except EOFError:
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                size = self.extrasize
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:               # just get some more of it
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                while size > self.extrasize:
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self._read(readsize)
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    readsize = min(self.max_read_chunk, readsize * 2)
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except EOFError:
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if size > self.extrasize:
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    size = self.extrasize
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        offset = self.offset - self.extrastart
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        chunk = self.extrabuf[offset: offset + size]
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrasize = self.extrasize - size
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.offset += size
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return chunk
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _unread(self, buf):
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrasize = len(buf) + self.extrasize
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.offset -= len(buf)
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _read(self, size=1024):
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.fileobj is None:
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise EOFError, "Reached EOF"
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self._new_member:
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # If the _new_member flag is set, we have to
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # jump to the next member, if there is one.
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # First, check if we're at the end of the file;
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # if so, it's time to stop; no more members to read.
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pos = self.fileobj.tell()   # Save current position
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.seek(0, 2)     # Seek to end of file
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if pos == self.fileobj.tell():
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise EOFError, "Reached EOF"
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.fileobj.seek( pos ) # Return to original position
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._init_read()
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._read_gzip_header()
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._new_member = False
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Read a chunk of data from the file
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        buf = self.fileobj.read(size)
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # If the EOF has been reached, flush the decompression object
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # and mark this object as finished.
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if buf == "":
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            uncompress = self.decompress.flush()
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._read_eof()
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._add_read_data( uncompress )
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise EOFError, 'Reached EOF'
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        uncompress = self.decompress.decompress(buf)
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._add_read_data( uncompress )
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.decompress.unused_data != "":
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Ending case: we've come to the end of a member in the file,
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # so seek back to the start of the unused data, finish up
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # this member, and read a new gzip header.
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # (The number of bytes to seek back is the length of the unused
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Check the CRC and file size, and set the flag so we read
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # a new member on the next call
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._read_eof()
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._new_member = True
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _add_read_data(self, data):
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        offset = self.offset - self.extrastart
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrabuf = self.extrabuf[offset:] + data
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrasize = self.extrasize + len(data)
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrastart = self.offset
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.size = self.size + len(data)
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _read_eof(self):
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # We've read to the end of the file, so we have to rewind in order
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # to reread the 8 bytes containing the CRC and the file size.
3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # We check the that the computed CRC and size of the
3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # uncompressed data matches the stored values.  Note that the size
3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # stored is the true file size mod 2**32.
3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.seek(-8, 1)
3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        crc32 = read32(self.fileobj)
3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        isize = read32(self.fileobj)  # may exceed 2GB
3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if crc32 != self.crc:
3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError("CRC check failed %s != %s" % (hex(crc32),
3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                                         hex(self.crc)))
3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif isize != (self.size & 0xffffffffL):
3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError, "Incorrect length of data produced"
3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Gzip files can be padded with zeroes and still have archives.
3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Consume all zero bytes and set the file position to the first
3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # non-zero byte. See http://www.gzip.org/#faq8
3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        c = "\x00"
3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while c == "\x00":
3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            c = self.fileobj.read(1)
3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if c:
3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.seek(-1, 1)
3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    @property
3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def closed(self):
3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.fileobj is None
3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def close(self):
3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.fileobj is None:
3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return
3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode == WRITE:
3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.write(self.compress.flush())
3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            write32u(self.fileobj, self.crc)
3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # self.size may exceed 2GB, or even 4GB
3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            write32u(self.fileobj, self.size & 0xffffffffL)
3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj = None
3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif self.mode == READ:
3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj = None
3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.myfileobj:
3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.myfileobj.close()
3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.myfileobj = None
3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._check_closed()
3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode == WRITE:
3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Ensure the compressor's buffer is flushed
3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.write(self.compress.flush(zlib_mode))
3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.fileobj.flush()
3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def fileno(self):
3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Invoke the underlying file object's fileno() method.
3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        This will raise AttributeError if the underlying file object
3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        doesn't support fileno().
3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.fileobj.fileno()
3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def rewind(self):
3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        '''Return the uncompressed stream file position indicator to the
3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        beginning of the file'''
3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode != READ:
3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise IOError("Can't rewind in write mode")
3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.fileobj.seek(0)
3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._new_member = True
3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrabuf = ""
3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrasize = 0
3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.extrastart = 0
3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.offset = 0
3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def readable(self):
3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.mode == READ
3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def writable(self):
4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.mode == WRITE
4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def seekable(self):
4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def seek(self, offset, whence=0):
4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if whence:
4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if whence == 1:
4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                offset = self.offset + offset
4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise ValueError('Seek from end not supported')
4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.mode == WRITE:
4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if offset < self.offset:
4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise IOError('Negative seek in write mode')
4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            count = offset - self.offset
4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for i in range(count // 1024):
4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.write(1024 * '\0')
4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.write((count % 1024) * '\0')
4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif self.mode == READ:
4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if offset < self.offset:
4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # for negative seek, rewind and do positive seek
4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.rewind()
4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            count = offset - self.offset
4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for i in range(count // 1024):
4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.read(1024)
4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.read(count % 1024)
4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.offset
4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def readline(self, size=-1):
4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if size < 0:
4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Shortcut common case - newline found in buffer.
4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            offset = self.offset - self.extrastart
4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            i = self.extrabuf.find('\n', offset) + 1
4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if i > 0:
4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.extrasize -= i - offset
4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.offset += i - offset
4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return self.extrabuf[offset: i]
4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            size = sys.maxint
4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            readsize = self.min_readsize
4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            readsize = size
4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        bufs = []
4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while size != 0:
4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            c = self.read(readsize)
4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            i = c.find('\n')
4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # We set i=size to break out of the loop under two
4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # conditions: 1) there's no newline, and the chunk is
4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # larger than size, or 2) there is a newline, but the
4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # resulting line would be longer than 'size'.
4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (size <= i) or (i == -1 and len(c) > size):
4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                i = size - 1
4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if i >= 0 or c == '':
4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                bufs.append(c[:i + 1])    # Add portion of last chunk
4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._unread(c[i + 1:])   # Push back rest of chunk
4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                break
4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Append chunk to list, decrease 'size',
4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            bufs.append(c)
4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            size = size - len(c)
4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            readsize = min(size, readsize * 2)
4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if readsize > self.min_readsize:
4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return ''.join(bufs) # Return resulting line
4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _test():
4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Act like gzip; with -d, act like gunzip.
4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # The input file is not deleted, however, nor are any other gzip
4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # options or features supported.
4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    args = sys.argv[1:]
4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    decompress = args and args[0] == "-d"
4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if decompress:
4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        args = args[1:]
4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not args:
4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        args = ["-"]
4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for arg in args:
4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if decompress:
4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if arg == "-":
4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                g = sys.stdout
4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if arg[-3:] != ".gz":
4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    print "filename doesn't end in .gz:", repr(arg)
4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    continue
4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                f = open(arg, "rb")
4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                g = __builtin__.open(arg[:-3], "wb")
4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if arg == "-":
4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                f = sys.stdin
4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                f = __builtin__.open(arg, "rb")
4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                g = open(arg + ".gz", "wb")
4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while True:
4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            chunk = f.read(1024)
5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not chunk:
5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                break
5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            g.write(chunk)
5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if g is not sys.stdout:
5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            g.close()
5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if f is not sys.stdin:
5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            f.close()
5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif __name__ == '__main__':
5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _test()
510