14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""Functions that read and write gzipped files. 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe user of the file doesn't have to worry about the compression, 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbut random access is not allowed.""" 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# based on Andrew Kuchling's minigzip.py distributed with the zlib module 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport struct, sys, time, os 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport zlib 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport io 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport __builtin__ 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = ["GzipFile","open"] 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmFTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmREAD, WRITE = 1, 2 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef write32u(output, value): 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # The L format writes the bit pattern correctly whether signed 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # or unsigned. 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm output.write(struct.pack("<L", value)) 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef read32(input): 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return struct.unpack("<I", input.read(4))[0] 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef open(filename, mode="rb", compresslevel=9): 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Shorthand for GzipFile(filename, mode, compresslevel). 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The filename argument is required; mode defaults to 'rb' 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm and compresslevel defaults to 9. 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return GzipFile(filename, mode, compresslevel) 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass GzipFile(io.BufferedIOBase): 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """The GzipFile class simulates most of the methods of a file object with 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the exception of the readinto() and truncate() methods. 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm myfileobj = None 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm max_read_chunk = 10 * 1024 * 1024 # 10Mb 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, filename=None, mode=None, 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compresslevel=9, fileobj=None, mtime=None): 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Constructor for the GzipFile class. 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm At least one of fileobj and filename must be given a 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm non-trivial value. 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The new class instance is based on fileobj, which can be a regular 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm file, a StringIO object, or any other object which simulates a file. 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm It defaults to None, in which case filename is opened to provide 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a file object. 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm When fileobj is not None, the filename argument is only used to be 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm included in the gzip file header, which may includes the original 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm filename of the uncompressed file. It defaults to the filename of 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fileobj, if discernible; otherwise, it defaults to the empty string, 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm and in this case the original filename is not included in the header. 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb', 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm depending on whether the file will be read or written. The default 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm is the mode of fileobj if discernible; otherwise, the default is 'rb'. 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Be aware that only the 'rb', 'ab', and 'wb' values should be used 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cross-platform portability. 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The compresslevel argument is an integer from 1 to 9 controlling the 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm level of compression; 1 is fastest and produces the least compression, 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm and 9 is slowest and produces the most compression. The default is 9. 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The mtime argument is an optional numeric timestamp to be written 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm to the stream when compressing. All gzip compressed streams 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm are required to contain a timestamp. If omitted or None, the 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm current time is used. This module ignores the timestamp when 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm decompressing; however, some programs, such as gunzip, make use 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm of it. The format of the timestamp is the same as that of the 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return value of time.time() and of the st_mtime member of the 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm object returned by os.stat(). 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # guarantee the file is opened in binary mode on platforms 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # that care about that sort of thing 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if mode and 'b' not in mode: 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mode += 'b' 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if fileobj is None: 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb') 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if filename is None: 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if hasattr(fileobj, 'name'): filename = fileobj.name 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: filename = '' 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if mode is None: 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if hasattr(fileobj, 'mode'): mode = fileobj.mode 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: mode = 'rb' 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if mode[0:1] == 'r': 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.mode = READ 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Set flag indicating start of a new member 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._new_member = True 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Buffer data read from gzip file. extrastart is offset in 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # stream where buffer starts. extrasize is number of 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # bytes remaining in buffer from current stream position. 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrabuf = "" 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrasize = 0 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrastart = 0 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.name = filename 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Starts small, scales exponentially 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.min_readsize = 100 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif mode[0:1] == 'w' or mode[0:1] == 'a': 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.mode = WRITE 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._init_write(filename) 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.compress = zlib.compressobj(compresslevel, 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zlib.DEFLATED, 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm -zlib.MAX_WBITS, 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zlib.DEF_MEM_LEVEL, 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 0) 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError, "Mode " + mode + " not supported" 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj = fileobj 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.offset = 0 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.mtime = mtime 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode == WRITE: 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._write_gzip_header() 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm @property 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def filename(self): 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import warnings 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn("use the name attribute", DeprecationWarning, 2) 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode == WRITE and self.name[-3:] != ".gz": 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.name + ".gz" 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.name 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = repr(self.fileobj) 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>' 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _check_closed(self): 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Raises a ValueError if the underlying file object has been closed. 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.closed: 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError('I/O operation on closed file.') 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _init_write(self, filename): 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.name = filename 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.crc = zlib.crc32("") & 0xffffffffL 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.size = 0 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.writebuf = [] 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.bufsize = 0 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _write_gzip_header(self): 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write('\037\213') # magic header 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write('\010') # compression method 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fname = os.path.basename(self.name) 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if fname.endswith(".gz"): 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fname = fname[:-3] 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm flags = 0 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if fname: 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm flags = FNAME 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write(chr(flags)) 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mtime = self.mtime 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if mtime is None: 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mtime = time.time() 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write32u(self.fileobj, long(mtime)) 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write('\002') 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write('\377') 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if fname: 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write(fname + '\000') 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _init_read(self): 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.crc = zlib.crc32("") & 0xffffffffL 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.size = 0 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _read_gzip_header(self): 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm magic = self.fileobj.read(2) 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if magic != '\037\213': 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError, 'Not a gzipped file' 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm method = ord( self.fileobj.read(1) ) 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if method != 8: 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError, 'Unknown compression method' 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm flag = ord( self.fileobj.read(1) ) 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.mtime = read32(self.fileobj) 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # extraflag = self.fileobj.read(1) 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # os = self.fileobj.read(1) 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.read(2) 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if flag & FEXTRA: 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Read & discard the extra field, if present 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xlen = ord(self.fileobj.read(1)) 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xlen = xlen + 256*ord(self.fileobj.read(1)) 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.read(xlen) 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if flag & FNAME: 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Read and discard a null-terminated string containing the filename 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while True: 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = self.fileobj.read(1) 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not s or s=='\000': 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if flag & FCOMMENT: 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Read and discard a null-terminated string containing a comment 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while True: 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = self.fileobj.read(1) 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not s or s=='\000': 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if flag & FHCRC: 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.read(2) # Read & discard the 16-bit header CRC 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def write(self,data): 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._check_closed() 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode != WRITE: 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import errno 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError(errno.EBADF, "write() on read-only GzipFile object") 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.fileobj is None: 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError, "write() on closed GzipFile object" 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Convert data type if called by io.BufferedWriter. 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(data, memoryview): 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data = data.tobytes() 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(data) > 0: 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.size = self.size + len(data) 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.crc = zlib.crc32(data, self.crc) & 0xffffffffL 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write( self.compress.compress(data) ) 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.offset += len(data) 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return len(data) 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def read(self, size=-1): 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._check_closed() 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode != READ: 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import errno 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError(errno.EBADF, "read() on write-only GzipFile object") 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.extrasize <= 0 and self.fileobj is None: 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return '' 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm readsize = 1024 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if size < 0: # get the whole thing 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while True: 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._read(readsize) 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm readsize = min(self.max_read_chunk, readsize * 2) 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except EOFError: 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size = self.extrasize 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: # just get some more of it 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while size > self.extrasize: 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._read(readsize) 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm readsize = min(self.max_read_chunk, readsize * 2) 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except EOFError: 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if size > self.extrasize: 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size = self.extrasize 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = self.offset - self.extrastart 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm chunk = self.extrabuf[offset: offset + size] 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrasize = self.extrasize - size 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.offset += size 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return chunk 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _unread(self, buf): 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrasize = len(buf) + self.extrasize 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.offset -= len(buf) 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _read(self, size=1024): 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.fileobj is None: 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise EOFError, "Reached EOF" 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._new_member: 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # If the _new_member flag is set, we have to 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # jump to the next member, if there is one. 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # First, check if we're at the end of the file; 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # if so, it's time to stop; no more members to read. 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pos = self.fileobj.tell() # Save current position 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.seek(0, 2) # Seek to end of file 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if pos == self.fileobj.tell(): 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise EOFError, "Reached EOF" 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.seek( pos ) # Return to original position 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._init_read() 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._read_gzip_header() 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._new_member = False 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Read a chunk of data from the file 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm buf = self.fileobj.read(size) 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # If the EOF has been reached, flush the decompression object 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # and mark this object as finished. 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if buf == "": 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm uncompress = self.decompress.flush() 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._read_eof() 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._add_read_data( uncompress ) 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise EOFError, 'Reached EOF' 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm uncompress = self.decompress.decompress(buf) 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._add_read_data( uncompress ) 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.decompress.unused_data != "": 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Ending case: we've come to the end of a member in the file, 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # so seek back to the start of the unused data, finish up 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # this member, and read a new gzip header. 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (The number of bytes to seek back is the length of the unused 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # data, minus 8 because _read_eof() will rewind a further 8 bytes) 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.seek( -len(self.decompress.unused_data)+8, 1) 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Check the CRC and file size, and set the flag so we read 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # a new member on the next call 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._read_eof() 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._new_member = True 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _add_read_data(self, data): 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.crc = zlib.crc32(data, self.crc) & 0xffffffffL 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = self.offset - self.extrastart 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrabuf = self.extrabuf[offset:] + data 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrasize = self.extrasize + len(data) 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrastart = self.offset 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.size = self.size + len(data) 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _read_eof(self): 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # We've read to the end of the file, so we have to rewind in order 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # to reread the 8 bytes containing the CRC and the file size. 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # We check the that the computed CRC and size of the 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # uncompressed data matches the stored values. Note that the size 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # stored is the true file size mod 2**32. 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.seek(-8, 1) 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm crc32 = read32(self.fileobj) 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm isize = read32(self.fileobj) # may exceed 2GB 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if crc32 != self.crc: 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError("CRC check failed %s != %s" % (hex(crc32), 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hex(self.crc))) 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif isize != (self.size & 0xffffffffL): 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError, "Incorrect length of data produced" 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Gzip files can be padded with zeroes and still have archives. 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Consume all zero bytes and set the file position to the first 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # non-zero byte. See http://www.gzip.org/#faq8 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm c = "\x00" 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while c == "\x00": 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm c = self.fileobj.read(1) 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if c: 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.seek(-1, 1) 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm @property 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def closed(self): 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.fileobj is None 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def close(self): 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.fileobj is None: 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode == WRITE: 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write(self.compress.flush()) 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write32u(self.fileobj, self.crc) 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # self.size may exceed 2GB, or even 4GB 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm write32u(self.fileobj, self.size & 0xffffffffL) 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj = None 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif self.mode == READ: 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj = None 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.myfileobj: 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.myfileobj.close() 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.myfileobj = None 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._check_closed() 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode == WRITE: 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Ensure the compressor's buffer is flushed 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.write(self.compress.flush(zlib_mode)) 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.flush() 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def fileno(self): 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Invoke the underlying file object's fileno() method. 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This will raise AttributeError if the underlying file object 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm doesn't support fileno(). 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.fileobj.fileno() 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def rewind(self): 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '''Return the uncompressed stream file position indicator to the 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm beginning of the file''' 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode != READ: 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError("Can't rewind in write mode") 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fileobj.seek(0) 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._new_member = True 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrabuf = "" 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrasize = 0 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrastart = 0 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.offset = 0 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def readable(self): 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.mode == READ 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def writable(self): 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.mode == WRITE 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def seekable(self): 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def seek(self, offset, whence=0): 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if whence: 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if whence == 1: 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = self.offset + offset 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError('Seek from end not supported') 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.mode == WRITE: 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if offset < self.offset: 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise IOError('Negative seek in write mode') 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count = offset - self.offset 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i in range(count // 1024): 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.write(1024 * '\0') 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.write((count % 1024) * '\0') 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif self.mode == READ: 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if offset < self.offset: 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for negative seek, rewind and do positive seek 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.rewind() 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count = offset - self.offset 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i in range(count // 1024): 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.read(1024) 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.read(count % 1024) 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.offset 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def readline(self, size=-1): 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if size < 0: 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Shortcut common case - newline found in buffer. 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = self.offset - self.extrastart 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = self.extrabuf.find('\n', offset) + 1 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i > 0: 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrasize -= i - offset 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.offset += i - offset 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.extrabuf[offset: i] 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size = sys.maxint 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm readsize = self.min_readsize 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm readsize = size 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bufs = [] 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while size != 0: 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm c = self.read(readsize) 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = c.find('\n') 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # We set i=size to break out of the loop under two 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # conditions: 1) there's no newline, and the chunk is 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # larger than size, or 2) there is a newline, but the 4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # resulting line would be longer than 'size'. 4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (size <= i) or (i == -1 and len(c) > size): 4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = size - 1 4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i >= 0 or c == '': 4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bufs.append(c[:i + 1]) # Add portion of last chunk 4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._unread(c[i + 1:]) # Push back rest of chunk 4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Append chunk to list, decrease 'size', 4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bufs.append(c) 4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size = size - len(c) 4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm readsize = min(size, readsize * 2) 4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if readsize > self.min_readsize: 4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.min_readsize = min(readsize, self.min_readsize * 2, 512) 4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ''.join(bufs) # Return resulting line 4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _test(): 4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Act like gzip; with -d, act like gunzip. 4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # The input file is not deleted, however, nor are any other gzip 4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # options or features supported. 4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args = sys.argv[1:] 4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm decompress = args and args[0] == "-d" 4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if decompress: 4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args = args[1:] 4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not args: 4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args = ["-"] 4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for arg in args: 4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if decompress: 4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if arg == "-": 4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = GzipFile(filename="", mode="rb", fileobj=sys.stdin) 4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = sys.stdout 4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if arg[-3:] != ".gz": 4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print "filename doesn't end in .gz:", repr(arg) 4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = open(arg, "rb") 4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = __builtin__.open(arg[:-3], "wb") 4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if arg == "-": 4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = sys.stdin 4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = GzipFile(filename="", mode="wb", fileobj=sys.stdout) 4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = __builtin__.open(arg, "rb") 4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = open(arg + ".gz", "wb") 4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while True: 4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm chunk = f.read(1024) 5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not chunk: 5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g.write(chunk) 5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if g is not sys.stdout: 5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g.close() 5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if f is not sys.stdin: 5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f.close() 5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif __name__ == '__main__': 5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _test() 510