1"""
2Read and write ZIP files.
3"""
4import struct, os, time, sys, shutil
5import binascii, cStringIO, stat
6import io
7import re
8import string
9
10try:
11    import zlib # We may need its compression method
12    crc32 = zlib.crc32
13except ImportError:
14    zlib = None
15    crc32 = binascii.crc32
16
17__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
18           "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
19
20class BadZipfile(Exception):
21    pass
22
23
24class LargeZipFile(Exception):
25    """
26    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27    and those extensions are disabled.
28    """
29
30error = BadZipfile      # The exception raised by this module
31
32ZIP64_LIMIT = (1 << 31) - 1
33ZIP_FILECOUNT_LIMIT = 1 << 16
34ZIP_MAX_COMMENT = (1 << 16) - 1
35
36# constants for Zip file compression methods
37ZIP_STORED = 0
38ZIP_DEFLATED = 8
39# Other ZIP compression methods not supported
40
41# Below are some formats and associated data for reading/writing headers using
42# the struct module.  The names and structures of headers/records are those used
43# in the PKWARE description of the ZIP file format:
44#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45# (URL valid as of January 2008)
46
47# The "end of central directory" structure, magic number, size, and indices
48# (section V.I in the format document)
49structEndArchive = "<4s4H2LH"
50stringEndArchive = "PK\005\006"
51sizeEndCentDir = struct.calcsize(structEndArchive)
52
53_ECD_SIGNATURE = 0
54_ECD_DISK_NUMBER = 1
55_ECD_DISK_START = 2
56_ECD_ENTRIES_THIS_DISK = 3
57_ECD_ENTRIES_TOTAL = 4
58_ECD_SIZE = 5
59_ECD_OFFSET = 6
60_ECD_COMMENT_SIZE = 7
61# These last two indices are not part of the structure as defined in the
62# spec, but they are used internally by this module as a convenience
63_ECD_COMMENT = 8
64_ECD_LOCATION = 9
65
66# The "central directory" structure, magic number, size, and indices
67# of entries in the structure (section V.F in the format document)
68structCentralDir = "<4s4B4HL2L5H2L"
69stringCentralDir = "PK\001\002"
70sizeCentralDir = struct.calcsize(structCentralDir)
71
72# indexes of entries in the central directory structure
73_CD_SIGNATURE = 0
74_CD_CREATE_VERSION = 1
75_CD_CREATE_SYSTEM = 2
76_CD_EXTRACT_VERSION = 3
77_CD_EXTRACT_SYSTEM = 4
78_CD_FLAG_BITS = 5
79_CD_COMPRESS_TYPE = 6
80_CD_TIME = 7
81_CD_DATE = 8
82_CD_CRC = 9
83_CD_COMPRESSED_SIZE = 10
84_CD_UNCOMPRESSED_SIZE = 11
85_CD_FILENAME_LENGTH = 12
86_CD_EXTRA_FIELD_LENGTH = 13
87_CD_COMMENT_LENGTH = 14
88_CD_DISK_NUMBER_START = 15
89_CD_INTERNAL_FILE_ATTRIBUTES = 16
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91_CD_LOCAL_HEADER_OFFSET = 18
92
93# The "local file header" structure, magic number, size, and indices
94# (section V.A in the format document)
95structFileHeader = "<4s2B4HL2L2H"
96stringFileHeader = "PK\003\004"
97sizeFileHeader = struct.calcsize(structFileHeader)
98
99_FH_SIGNATURE = 0
100_FH_EXTRACT_VERSION = 1
101_FH_EXTRACT_SYSTEM = 2
102_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103_FH_COMPRESSION_METHOD = 4
104_FH_LAST_MOD_TIME = 5
105_FH_LAST_MOD_DATE = 6
106_FH_CRC = 7
107_FH_COMPRESSED_SIZE = 8
108_FH_UNCOMPRESSED_SIZE = 9
109_FH_FILENAME_LENGTH = 10
110_FH_EXTRA_FIELD_LENGTH = 11
111
112# The "Zip64 end of central directory locator" structure, magic number, and size
113structEndArchive64Locator = "<4sLQL"
114stringEndArchive64Locator = "PK\x06\x07"
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
116
117# The "Zip64 end of central directory" record, magic number, size, and indices
118# (section V.G in the format document)
119structEndArchive64 = "<4sQ2H2L4Q"
120stringEndArchive64 = "PK\x06\x06"
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)
122
123_CD64_SIGNATURE = 0
124_CD64_DIRECTORY_RECSIZE = 1
125_CD64_CREATE_VERSION = 2
126_CD64_EXTRACT_VERSION = 3
127_CD64_DISK_NUMBER = 4
128_CD64_DISK_NUMBER_START = 5
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130_CD64_NUMBER_ENTRIES_TOTAL = 7
131_CD64_DIRECTORY_SIZE = 8
132_CD64_OFFSET_START_CENTDIR = 9
133
134def _check_zipfile(fp):
135    try:
136        if _EndRecData(fp):
137            return True         # file has correct magic number
138    except IOError:
139        pass
140    return False
141
142def is_zipfile(filename):
143    """Quickly see if a file is a ZIP file by checking the magic number.
144
145    The filename argument may be a file or file-like object too.
146    """
147    result = False
148    try:
149        if hasattr(filename, "read"):
150            result = _check_zipfile(fp=filename)
151        else:
152            with open(filename, "rb") as fp:
153                result = _check_zipfile(fp)
154    except IOError:
155        pass
156    return result
157
158def _EndRecData64(fpin, offset, endrec):
159    """
160    Read the ZIP64 end-of-archive records and use that to update endrec
161    """
162    try:
163        fpin.seek(offset - sizeEndCentDir64Locator, 2)
164    except IOError:
165        # If the seek fails, the file is not large enough to contain a ZIP64
166        # end-of-archive record, so just return the end record we were given.
167        return endrec
168
169    data = fpin.read(sizeEndCentDir64Locator)
170    if len(data) != sizeEndCentDir64Locator:
171        return endrec
172    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173    if sig != stringEndArchive64Locator:
174        return endrec
175
176    if diskno != 0 or disks != 1:
177        raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179    # Assume no 'zip64 extensible data'
180    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181    data = fpin.read(sizeEndCentDir64)
182    if len(data) != sizeEndCentDir64:
183        return endrec
184    sig, sz, create_version, read_version, disk_num, disk_dir, \
185            dircount, dircount2, dirsize, diroffset = \
186            struct.unpack(structEndArchive64, data)
187    if sig != stringEndArchive64:
188        return endrec
189
190    # Update the original endrec using data from the ZIP64 record
191    endrec[_ECD_SIGNATURE] = sig
192    endrec[_ECD_DISK_NUMBER] = disk_num
193    endrec[_ECD_DISK_START] = disk_dir
194    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195    endrec[_ECD_ENTRIES_TOTAL] = dircount2
196    endrec[_ECD_SIZE] = dirsize
197    endrec[_ECD_OFFSET] = diroffset
198    return endrec
199
200
201def _EndRecData(fpin):
202    """Return data from the "End of Central Directory" record, or None.
203
204    The data is a list of the nine items in the ZIP "End of central dir"
205    record followed by a tenth item, the file seek offset of this record."""
206
207    # Determine file size
208    fpin.seek(0, 2)
209    filesize = fpin.tell()
210
211    # Check to see if this is ZIP file with no archive comment (the
212    # "end of central directory" structure should be the last item in the
213    # file if this is the case).
214    try:
215        fpin.seek(-sizeEndCentDir, 2)
216    except IOError:
217        return None
218    data = fpin.read()
219    if (len(data) == sizeEndCentDir and
220        data[0:4] == stringEndArchive and
221        data[-2:] == b"\000\000"):
222        # the signature is correct and there's no comment, unpack structure
223        endrec = struct.unpack(structEndArchive, data)
224        endrec=list(endrec)
225
226        # Append a blank comment and record start offset
227        endrec.append("")
228        endrec.append(filesize - sizeEndCentDir)
229
230        # Try to read the "Zip64 end of central directory" structure
231        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233    # Either this is not a ZIP file, or it is a ZIP file with an archive
234    # comment.  Search the end of the file for the "end of central directory"
235    # record signature. The comment is the last item in the ZIP file and may be
236    # up to 64K long.  It is assumed that the "end of central directory" magic
237    # number does not appear in the comment.
238    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239    fpin.seek(maxCommentStart, 0)
240    data = fpin.read()
241    start = data.rfind(stringEndArchive)
242    if start >= 0:
243        # found the magic number; attempt to unpack and interpret
244        recData = data[start:start+sizeEndCentDir]
245        if len(recData) != sizeEndCentDir:
246            # Zip file is corrupted.
247            return None
248        endrec = list(struct.unpack(structEndArchive, recData))
249        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251        endrec.append(comment)
252        endrec.append(maxCommentStart + start)
253
254        # Try to read the "Zip64 end of central directory" structure
255        return _EndRecData64(fpin, maxCommentStart + start - filesize,
256                             endrec)
257
258    # Unable to find a valid end of central directory structure
259    return None
260
261
262class ZipInfo (object):
263    """Class with attributes describing each file in the ZIP archive."""
264
265    __slots__ = (
266            'orig_filename',
267            'filename',
268            'date_time',
269            'compress_type',
270            'comment',
271            'extra',
272            'create_system',
273            'create_version',
274            'extract_version',
275            'reserved',
276            'flag_bits',
277            'volume',
278            'internal_attr',
279            'external_attr',
280            'header_offset',
281            'CRC',
282            'compress_size',
283            'file_size',
284            '_raw_time',
285        )
286
287    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
288        self.orig_filename = filename   # Original file name in archive
289
290        # Terminate the file name at the first null byte.  Null bytes in file
291        # names are used as tricks by viruses in archives.
292        null_byte = filename.find(chr(0))
293        if null_byte >= 0:
294            filename = filename[0:null_byte]
295        # This is used to ensure paths in generated ZIP files always use
296        # forward slashes as the directory separator, as required by the
297        # ZIP format specification.
298        if os.sep != "/" and os.sep in filename:
299            filename = filename.replace(os.sep, "/")
300
301        self.filename = filename        # Normalized file name
302        self.date_time = date_time      # year, month, day, hour, min, sec
303
304        if date_time[0] < 1980:
305            raise ValueError('ZIP does not support timestamps before 1980')
306
307        # Standard values:
308        self.compress_type = ZIP_STORED # Type of compression for the file
309        self.comment = ""               # Comment for each file
310        self.extra = ""                 # ZIP extra data
311        if sys.platform == 'win32':
312            self.create_system = 0          # System which created ZIP archive
313        else:
314            # Assume everything else is unix-y
315            self.create_system = 3          # System which created ZIP archive
316        self.create_version = 20        # Version which created ZIP archive
317        self.extract_version = 20       # Version needed to extract archive
318        self.reserved = 0               # Must be zero
319        self.flag_bits = 0              # ZIP flag bits
320        self.volume = 0                 # Volume number of file header
321        self.internal_attr = 0          # Internal attributes
322        self.external_attr = 0          # External file attributes
323        # Other attributes are set by class ZipFile:
324        # header_offset         Byte offset to the file header
325        # CRC                   CRC-32 of the uncompressed file
326        # compress_size         Size of the compressed file
327        # file_size             Size of the uncompressed file
328
329    def FileHeader(self, zip64=None):
330        """Return the per-file header as a string."""
331        dt = self.date_time
332        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
333        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
334        if self.flag_bits & 0x08:
335            # Set these to zero because we write them after the file data
336            CRC = compress_size = file_size = 0
337        else:
338            CRC = self.CRC
339            compress_size = self.compress_size
340            file_size = self.file_size
341
342        extra = self.extra
343
344        if zip64 is None:
345            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346        if zip64:
347            fmt = '<HHQQ'
348            extra = extra + struct.pack(fmt,
349                    1, struct.calcsize(fmt)-4, file_size, compress_size)
350        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351            if not zip64:
352                raise LargeZipFile("Filesize would require ZIP64 extensions")
353            # File is larger than what fits into a 4 byte integer,
354            # fall back to the ZIP64 extension
355            file_size = 0xffffffff
356            compress_size = 0xffffffff
357            self.extract_version = max(45, self.extract_version)
358            self.create_version = max(45, self.extract_version)
359
360        filename, flag_bits = self._encodeFilenameFlags()
361        header = struct.pack(structFileHeader, stringFileHeader,
362                 self.extract_version, self.reserved, flag_bits,
363                 self.compress_type, dostime, dosdate, CRC,
364                 compress_size, file_size,
365                 len(filename), len(extra))
366        return header + filename + extra
367
368    def _encodeFilenameFlags(self):
369        if isinstance(self.filename, unicode):
370            try:
371                return self.filename.encode('ascii'), self.flag_bits
372            except UnicodeEncodeError:
373                return self.filename.encode('utf-8'), self.flag_bits | 0x800
374        else:
375            return self.filename, self.flag_bits
376
377    def _decodeFilename(self):
378        if self.flag_bits & 0x800:
379            return self.filename.decode('utf-8')
380        else:
381            return self.filename
382
383    def _decodeExtra(self):
384        # Try to decode the extra field.
385        extra = self.extra
386        unpack = struct.unpack
387        while extra:
388            tp, ln = unpack('<HH', extra[:4])
389            if tp == 1:
390                if ln >= 24:
391                    counts = unpack('<QQQ', extra[4:28])
392                elif ln == 16:
393                    counts = unpack('<QQ', extra[4:20])
394                elif ln == 8:
395                    counts = unpack('<Q', extra[4:12])
396                elif ln == 0:
397                    counts = ()
398                else:
399                    raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401                idx = 0
402
403                # ZIP64 extension (large files and/or large archives)
404                if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
405                    self.file_size = counts[idx]
406                    idx += 1
407
408                if self.compress_size == 0xFFFFFFFFL:
409                    self.compress_size = counts[idx]
410                    idx += 1
411
412                if self.header_offset == 0xffffffffL:
413                    old = self.header_offset
414                    self.header_offset = counts[idx]
415                    idx+=1
416
417            extra = extra[ln+4:]
418
419
420class _ZipDecrypter:
421    """Class to handle decryption of files stored within a ZIP archive.
422
423    ZIP supports a password-based form of encryption. Even though known
424    plaintext attacks have been found against it, it is still useful
425    to be able to get data out of such a file.
426
427    Usage:
428        zd = _ZipDecrypter(mypwd)
429        plain_char = zd(cypher_char)
430        plain_text = map(zd, cypher_text)
431    """
432
433    def _GenerateCRCTable():
434        """Generate a CRC-32 table.
435
436        ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437        internal keys. We noticed that a direct implementation is faster than
438        relying on binascii.crc32().
439        """
440        poly = 0xedb88320
441        table = [0] * 256
442        for i in range(256):
443            crc = i
444            for j in range(8):
445                if crc & 1:
446                    crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447                else:
448                    crc = ((crc >> 1) & 0x7FFFFFFF)
449            table[i] = crc
450        return table
451    crctable = _GenerateCRCTable()
452
453    def _crc32(self, ch, crc):
454        """Compute the CRC32 primitive on one byte."""
455        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457    def __init__(self, pwd):
458        self.key0 = 305419896
459        self.key1 = 591751049
460        self.key2 = 878082192
461        for p in pwd:
462            self._UpdateKeys(p)
463
464    def _UpdateKeys(self, c):
465        self.key0 = self._crc32(c, self.key0)
466        self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467        self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470    def __call__(self, c):
471        """Decrypt a single character."""
472        c = ord(c)
473        k = self.key2 | 2
474        c = c ^ (((k * (k^1)) >> 8) & 255)
475        c = chr(c)
476        self._UpdateKeys(c)
477        return c
478
479
480compressor_names = {
481    0: 'store',
482    1: 'shrink',
483    2: 'reduce',
484    3: 'reduce',
485    4: 'reduce',
486    5: 'reduce',
487    6: 'implode',
488    7: 'tokenize',
489    8: 'deflate',
490    9: 'deflate64',
491    10: 'implode',
492    12: 'bzip2',
493    14: 'lzma',
494    18: 'terse',
495    19: 'lz77',
496    97: 'wavpack',
497    98: 'ppmd',
498}
499
500
501class ZipExtFile(io.BufferedIOBase):
502    """File-like object for reading an archive member.
503       Is returned by ZipFile.open().
504    """
505
506    # Max size supported by decompressor.
507    MAX_N = 1 << 31 - 1
508
509    # Read from compressed files in 4k blocks.
510    MIN_READ_SIZE = 4096
511
512    # Search for universal newlines or line chunks.
513    PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
514
515    def __init__(self, fileobj, mode, zipinfo, decrypter=None,
516            close_fileobj=False):
517        self._fileobj = fileobj
518        self._decrypter = decrypter
519        self._close_fileobj = close_fileobj
520
521        self._compress_type = zipinfo.compress_type
522        self._compress_size = zipinfo.compress_size
523        self._compress_left = zipinfo.compress_size
524
525        if self._compress_type == ZIP_DEFLATED:
526            self._decompressor = zlib.decompressobj(-15)
527        elif self._compress_type != ZIP_STORED:
528            descr = compressor_names.get(self._compress_type)
529            if descr:
530                raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
531            else:
532                raise NotImplementedError("compression type %d" % (self._compress_type,))
533        self._unconsumed = ''
534
535        self._readbuffer = ''
536        self._offset = 0
537
538        self._universal = 'U' in mode
539        self.newlines = None
540
541        # Adjust read size for encrypted files since the first 12 bytes
542        # are for the encryption/password information.
543        if self._decrypter is not None:
544            self._compress_left -= 12
545
546        self.mode = mode
547        self.name = zipinfo.filename
548
549        if hasattr(zipinfo, 'CRC'):
550            self._expected_crc = zipinfo.CRC
551            self._running_crc = crc32(b'') & 0xffffffff
552        else:
553            self._expected_crc = None
554
555    def readline(self, limit=-1):
556        """Read and return a line from the stream.
557
558        If limit is specified, at most limit bytes will be read.
559        """
560
561        if not self._universal and limit < 0:
562            # Shortcut common case - newline found in buffer.
563            i = self._readbuffer.find('\n', self._offset) + 1
564            if i > 0:
565                line = self._readbuffer[self._offset: i]
566                self._offset = i
567                return line
568
569        if not self._universal:
570            return io.BufferedIOBase.readline(self, limit)
571
572        line = ''
573        while limit < 0 or len(line) < limit:
574            readahead = self.peek(2)
575            if readahead == '':
576                return line
577
578            #
579            # Search for universal newlines or line chunks.
580            #
581            # The pattern returns either a line chunk or a newline, but not
582            # both. Combined with peek(2), we are assured that the sequence
583            # '\r\n' is always retrieved completely and never split into
584            # separate newlines - '\r', '\n' due to coincidental readaheads.
585            #
586            match = self.PATTERN.search(readahead)
587            newline = match.group('newline')
588            if newline is not None:
589                if self.newlines is None:
590                    self.newlines = []
591                if newline not in self.newlines:
592                    self.newlines.append(newline)
593                self._offset += len(newline)
594                return line + '\n'
595
596            chunk = match.group('chunk')
597            if limit >= 0:
598                chunk = chunk[: limit - len(line)]
599
600            self._offset += len(chunk)
601            line += chunk
602
603        return line
604
605    def peek(self, n=1):
606        """Returns buffered bytes without advancing the position."""
607        if n > len(self._readbuffer) - self._offset:
608            chunk = self.read(n)
609            self._offset -= len(chunk)
610
611        # Return up to 512 bytes to reduce allocation overhead for tight loops.
612        return self._readbuffer[self._offset: self._offset + 512]
613
614    def readable(self):
615        return True
616
617    def read(self, n=-1):
618        """Read and return up to n bytes.
619        If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
620        """
621        buf = ''
622        if n is None:
623            n = -1
624        while True:
625            if n < 0:
626                data = self.read1(n)
627            elif n > len(buf):
628                data = self.read1(n - len(buf))
629            else:
630                return buf
631            if len(data) == 0:
632                return buf
633            buf += data
634
635    def _update_crc(self, newdata, eof):
636        # Update the CRC using the given data.
637        if self._expected_crc is None:
638            # No need to compute the CRC if we don't have a reference value
639            return
640        self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
641        # Check the CRC if we're at the end of the file
642        if eof and self._running_crc != self._expected_crc:
643            raise BadZipfile("Bad CRC-32 for file %r" % self.name)
644
645    def read1(self, n):
646        """Read up to n bytes with at most one read() system call."""
647
648        # Simplify algorithm (branching) by transforming negative n to large n.
649        if n < 0 or n is None:
650            n = self.MAX_N
651
652        # Bytes available in read buffer.
653        len_readbuffer = len(self._readbuffer) - self._offset
654
655        # Read from file.
656        if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
657            nbytes = n - len_readbuffer - len(self._unconsumed)
658            nbytes = max(nbytes, self.MIN_READ_SIZE)
659            nbytes = min(nbytes, self._compress_left)
660
661            data = self._fileobj.read(nbytes)
662            self._compress_left -= len(data)
663
664            if data and self._decrypter is not None:
665                data = ''.join(map(self._decrypter, data))
666
667            if self._compress_type == ZIP_STORED:
668                self._update_crc(data, eof=(self._compress_left==0))
669                self._readbuffer = self._readbuffer[self._offset:] + data
670                self._offset = 0
671            else:
672                # Prepare deflated bytes for decompression.
673                self._unconsumed += data
674
675        # Handle unconsumed data.
676        if (len(self._unconsumed) > 0 and n > len_readbuffer and
677            self._compress_type == ZIP_DEFLATED):
678            data = self._decompressor.decompress(
679                self._unconsumed,
680                max(n - len_readbuffer, self.MIN_READ_SIZE)
681            )
682
683            self._unconsumed = self._decompressor.unconsumed_tail
684            eof = len(self._unconsumed) == 0 and self._compress_left == 0
685            if eof:
686                data += self._decompressor.flush()
687
688            self._update_crc(data, eof=eof)
689            self._readbuffer = self._readbuffer[self._offset:] + data
690            self._offset = 0
691
692        # Read from buffer.
693        data = self._readbuffer[self._offset: self._offset + n]
694        self._offset += len(data)
695        return data
696
697    def close(self):
698        try :
699            if self._close_fileobj:
700                self._fileobj.close()
701        finally:
702            super(ZipExtFile, self).close()
703
704
705class ZipFile(object):
706    """ Class with methods to open, read, write, close, list zip files.
707
708    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
709
710    file: Either the path to the file, or a file-like object.
711          If it is a path, the file will be opened and closed by ZipFile.
712    mode: The mode can be either read "r", write "w" or append "a".
713    compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
714    allowZip64: if True ZipFile will create files with ZIP64 extensions when
715                needed, otherwise it will raise an exception when this would
716                be necessary.
717
718    """
719
720    fp = None                   # Set here since __del__ checks it
721
722    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
723        """Open the ZIP file with mode read "r", write "w" or append "a"."""
724        if mode not in ("r", "w", "a"):
725            raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
726
727        if compression == ZIP_STORED:
728            pass
729        elif compression == ZIP_DEFLATED:
730            if not zlib:
731                raise RuntimeError,\
732                      "Compression requires the (missing) zlib module"
733        else:
734            raise RuntimeError, "That compression method is not supported"
735
736        self._allowZip64 = allowZip64
737        self._didModify = False
738        self.debug = 0  # Level of printing: 0 through 3
739        self.NameToInfo = {}    # Find file info given name
740        self.filelist = []      # List of ZipInfo instances for archive
741        self.compression = compression  # Method of compression
742        self.mode = key = mode.replace('b', '')[0]
743        self.pwd = None
744        self._comment = ''
745
746        # Check if we were passed a file-like object
747        if isinstance(file, basestring):
748            self._filePassed = 0
749            self.filename = file
750            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
751            try:
752                self.fp = open(file, modeDict[mode])
753            except IOError:
754                if mode == 'a':
755                    mode = key = 'w'
756                    self.fp = open(file, modeDict[mode])
757                else:
758                    raise
759        else:
760            self._filePassed = 1
761            self.fp = file
762            self.filename = getattr(file, 'name', None)
763
764        try:
765            if key == 'r':
766                self._RealGetContents()
767            elif key == 'w':
768                # set the modified flag so central directory gets written
769                # even if no files are added to the archive
770                self._didModify = True
771            elif key == 'a':
772                try:
773                    # See if file is a zip file
774                    self._RealGetContents()
775                    # seek to start of directory and overwrite
776                    self.fp.seek(self.start_dir, 0)
777                except BadZipfile:
778                    # file is not a zip file, just append
779                    self.fp.seek(0, 2)
780
781                    # set the modified flag so central directory gets written
782                    # even if no files are added to the archive
783                    self._didModify = True
784            else:
785                raise RuntimeError('Mode must be "r", "w" or "a"')
786        except:
787            fp = self.fp
788            self.fp = None
789            if not self._filePassed:
790                fp.close()
791            raise
792
793    def __enter__(self):
794        return self
795
796    def __exit__(self, type, value, traceback):
797        self.close()
798
799    def _RealGetContents(self):
800        """Read in the table of contents for the ZIP file."""
801        fp = self.fp
802        try:
803            endrec = _EndRecData(fp)
804        except IOError:
805            raise BadZipfile("File is not a zip file")
806        if not endrec:
807            raise BadZipfile, "File is not a zip file"
808        if self.debug > 1:
809            print endrec
810        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
811        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
812        self._comment = endrec[_ECD_COMMENT]    # archive comment
813
814        # "concat" is zero, unless zip was concatenated to another file
815        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
816        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
817            # If Zip64 extension structures are present, account for them
818            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
819
820        if self.debug > 2:
821            inferred = concat + offset_cd
822            print "given, inferred, offset", offset_cd, inferred, concat
823        # self.start_dir:  Position of start of central directory
824        self.start_dir = offset_cd + concat
825        fp.seek(self.start_dir, 0)
826        data = fp.read(size_cd)
827        fp = cStringIO.StringIO(data)
828        total = 0
829        while total < size_cd:
830            centdir = fp.read(sizeCentralDir)
831            if len(centdir) != sizeCentralDir:
832                raise BadZipfile("Truncated central directory")
833            centdir = struct.unpack(structCentralDir, centdir)
834            if centdir[_CD_SIGNATURE] != stringCentralDir:
835                raise BadZipfile("Bad magic number for central directory")
836            if self.debug > 2:
837                print centdir
838            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
839            # Create ZipInfo instance to store file information
840            x = ZipInfo(filename)
841            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
842            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
843            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
844            (x.create_version, x.create_system, x.extract_version, x.reserved,
845                x.flag_bits, x.compress_type, t, d,
846                x.CRC, x.compress_size, x.file_size) = centdir[1:12]
847            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
848            # Convert date/time code to (year, month, day, hour, min, sec)
849            x._raw_time = t
850            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
851                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
852
853            x._decodeExtra()
854            x.header_offset = x.header_offset + concat
855            x.filename = x._decodeFilename()
856            self.filelist.append(x)
857            self.NameToInfo[x.filename] = x
858
859            # update total bytes read from central directory
860            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
861                     + centdir[_CD_EXTRA_FIELD_LENGTH]
862                     + centdir[_CD_COMMENT_LENGTH])
863
864            if self.debug > 2:
865                print "total", total
866
867
868    def namelist(self):
869        """Return a list of file names in the archive."""
870        l = []
871        for data in self.filelist:
872            l.append(data.filename)
873        return l
874
875    def infolist(self):
876        """Return a list of class ZipInfo instances for files in the
877        archive."""
878        return self.filelist
879
880    def printdir(self):
881        """Print a table of contents for the zip file."""
882        print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
883        for zinfo in self.filelist:
884            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
885            print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
886
887    def testzip(self):
888        """Read all the files and check the CRC."""
889        chunk_size = 2 ** 20
890        for zinfo in self.filelist:
891            try:
892                # Read by chunks, to avoid an OverflowError or a
893                # MemoryError with very large embedded files.
894                with self.open(zinfo.filename, "r") as f:
895                    while f.read(chunk_size):     # Check CRC-32
896                        pass
897            except BadZipfile:
898                return zinfo.filename
899
900    def getinfo(self, name):
901        """Return the instance of ZipInfo given 'name'."""
902        info = self.NameToInfo.get(name)
903        if info is None:
904            raise KeyError(
905                'There is no item named %r in the archive' % name)
906
907        return info
908
909    def setpassword(self, pwd):
910        """Set default password for encrypted files."""
911        self.pwd = pwd
912
913    @property
914    def comment(self):
915        """The comment text associated with the ZIP file."""
916        return self._comment
917
918    @comment.setter
919    def comment(self, comment):
920        # check for valid comment length
921        if len(comment) >= ZIP_MAX_COMMENT:
922            if self.debug:
923                print('Archive comment is too long; truncating to %d bytes'
924                        % ZIP_MAX_COMMENT)
925            comment = comment[:ZIP_MAX_COMMENT]
926        self._comment = comment
927        self._didModify = True
928
929    def read(self, name, pwd=None):
930        """Return file bytes (as a string) for name."""
931        return self.open(name, "r", pwd).read()
932
933    def open(self, name, mode="r", pwd=None):
934        """Return file-like object for 'name'."""
935        if mode not in ("r", "U", "rU"):
936            raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
937        if not self.fp:
938            raise RuntimeError, \
939                  "Attempt to read ZIP archive that was already closed"
940
941        # Only open a new file for instances where we were not
942        # given a file object in the constructor
943        if self._filePassed:
944            zef_file = self.fp
945            should_close = False
946        else:
947            zef_file = open(self.filename, 'rb')
948            should_close = True
949
950        try:
951            # Make sure we have an info object
952            if isinstance(name, ZipInfo):
953                # 'name' is already an info object
954                zinfo = name
955            else:
956                # Get info object for name
957                zinfo = self.getinfo(name)
958
959            zef_file.seek(zinfo.header_offset, 0)
960
961            # Skip the file header:
962            fheader = zef_file.read(sizeFileHeader)
963            if len(fheader) != sizeFileHeader:
964                raise BadZipfile("Truncated file header")
965            fheader = struct.unpack(structFileHeader, fheader)
966            if fheader[_FH_SIGNATURE] != stringFileHeader:
967                raise BadZipfile("Bad magic number for file header")
968
969            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
970            if fheader[_FH_EXTRA_FIELD_LENGTH]:
971                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
972
973            if fname != zinfo.orig_filename:
974                raise BadZipfile, \
975                        'File name in directory "%s" and header "%s" differ.' % (
976                            zinfo.orig_filename, fname)
977
978            # check for encrypted flag & handle password
979            is_encrypted = zinfo.flag_bits & 0x1
980            zd = None
981            if is_encrypted:
982                if not pwd:
983                    pwd = self.pwd
984                if not pwd:
985                    raise RuntimeError, "File %s is encrypted, " \
986                        "password required for extraction" % name
987
988                zd = _ZipDecrypter(pwd)
989                # The first 12 bytes in the cypher stream is an encryption header
990                #  used to strengthen the algorithm. The first 11 bytes are
991                #  completely random, while the 12th contains the MSB of the CRC,
992                #  or the MSB of the file time depending on the header type
993                #  and is used to check the correctness of the password.
994                bytes = zef_file.read(12)
995                h = map(zd, bytes[0:12])
996                if zinfo.flag_bits & 0x8:
997                    # compare against the file type from extended local headers
998                    check_byte = (zinfo._raw_time >> 8) & 0xff
999                else:
1000                    # compare against the CRC otherwise
1001                    check_byte = (zinfo.CRC >> 24) & 0xff
1002                if ord(h[11]) != check_byte:
1003                    raise RuntimeError("Bad password for file", name)
1004
1005            return ZipExtFile(zef_file, mode, zinfo, zd,
1006                    close_fileobj=should_close)
1007        except:
1008            if should_close:
1009                zef_file.close()
1010            raise
1011
1012    def extract(self, member, path=None, pwd=None):
1013        """Extract a member from the archive to the current working directory,
1014           using its full name. Its file information is extracted as accurately
1015           as possible. `member' may be a filename or a ZipInfo object. You can
1016           specify a different directory using `path'.
1017        """
1018        if not isinstance(member, ZipInfo):
1019            member = self.getinfo(member)
1020
1021        if path is None:
1022            path = os.getcwd()
1023
1024        return self._extract_member(member, path, pwd)
1025
1026    def extractall(self, path=None, members=None, pwd=None):
1027        """Extract all members from the archive to the current working
1028           directory. `path' specifies a different directory to extract to.
1029           `members' is optional and must be a subset of the list returned
1030           by namelist().
1031        """
1032        if members is None:
1033            members = self.namelist()
1034
1035        for zipinfo in members:
1036            self.extract(zipinfo, path, pwd)
1037
1038    def _extract_member(self, member, targetpath, pwd):
1039        """Extract the ZipInfo object 'member' to a physical
1040           file on the path targetpath.
1041        """
1042        # build the destination pathname, replacing
1043        # forward slashes to platform specific separators.
1044        arcname = member.filename.replace('/', os.path.sep)
1045
1046        if os.path.altsep:
1047            arcname = arcname.replace(os.path.altsep, os.path.sep)
1048        # interpret absolute pathname as relative, remove drive letter or
1049        # UNC path, redundant separators, "." and ".." components.
1050        arcname = os.path.splitdrive(arcname)[1]
1051        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1052                    if x not in ('', os.path.curdir, os.path.pardir))
1053        if os.path.sep == '\\':
1054            # filter illegal characters on Windows
1055            illegal = ':<>|"?*'
1056            if isinstance(arcname, unicode):
1057                table = {ord(c): ord('_') for c in illegal}
1058            else:
1059                table = string.maketrans(illegal, '_' * len(illegal))
1060            arcname = arcname.translate(table)
1061            # remove trailing dots
1062            arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1063            arcname = os.path.sep.join(x for x in arcname if x)
1064
1065        targetpath = os.path.join(targetpath, arcname)
1066        targetpath = os.path.normpath(targetpath)
1067
1068        # Create all upper directories if necessary.
1069        upperdirs = os.path.dirname(targetpath)
1070        if upperdirs and not os.path.exists(upperdirs):
1071            os.makedirs(upperdirs)
1072
1073        if member.filename[-1] == '/':
1074            if not os.path.isdir(targetpath):
1075                os.mkdir(targetpath)
1076            return targetpath
1077
1078        with self.open(member, pwd=pwd) as source, \
1079             file(targetpath, "wb") as target:
1080            shutil.copyfileobj(source, target)
1081
1082        return targetpath
1083
1084    def _writecheck(self, zinfo):
1085        """Check for errors before writing a file to the archive."""
1086        if zinfo.filename in self.NameToInfo:
1087            if self.debug:      # Warning for duplicate names
1088                print "Duplicate name:", zinfo.filename
1089        if self.mode not in ("w", "a"):
1090            raise RuntimeError, 'write() requires mode "w" or "a"'
1091        if not self.fp:
1092            raise RuntimeError, \
1093                  "Attempt to write ZIP archive that was already closed"
1094        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1095            raise RuntimeError, \
1096                  "Compression requires the (missing) zlib module"
1097        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1098            raise RuntimeError, \
1099                  "That compression method is not supported"
1100        if zinfo.file_size > ZIP64_LIMIT:
1101            if not self._allowZip64:
1102                raise LargeZipFile("Filesize would require ZIP64 extensions")
1103        if zinfo.header_offset > ZIP64_LIMIT:
1104            if not self._allowZip64:
1105                raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1106
1107    def write(self, filename, arcname=None, compress_type=None):
1108        """Put the bytes from filename into the archive under the name
1109        arcname."""
1110        if not self.fp:
1111            raise RuntimeError(
1112                  "Attempt to write to ZIP archive that was already closed")
1113
1114        st = os.stat(filename)
1115        isdir = stat.S_ISDIR(st.st_mode)
1116        mtime = time.localtime(st.st_mtime)
1117        date_time = mtime[0:6]
1118        # Create ZipInfo instance to store file information
1119        if arcname is None:
1120            arcname = filename
1121        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1122        while arcname[0] in (os.sep, os.altsep):
1123            arcname = arcname[1:]
1124        if isdir:
1125            arcname += '/'
1126        zinfo = ZipInfo(arcname, date_time)
1127        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1128        if compress_type is None:
1129            zinfo.compress_type = self.compression
1130        else:
1131            zinfo.compress_type = compress_type
1132
1133        zinfo.file_size = st.st_size
1134        zinfo.flag_bits = 0x00
1135        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1136
1137        self._writecheck(zinfo)
1138        self._didModify = True
1139
1140        if isdir:
1141            zinfo.file_size = 0
1142            zinfo.compress_size = 0
1143            zinfo.CRC = 0
1144            self.filelist.append(zinfo)
1145            self.NameToInfo[zinfo.filename] = zinfo
1146            self.fp.write(zinfo.FileHeader(False))
1147            return
1148
1149        with open(filename, "rb") as fp:
1150            # Must overwrite CRC and sizes with correct data later
1151            zinfo.CRC = CRC = 0
1152            zinfo.compress_size = compress_size = 0
1153            # Compressed size can be larger than uncompressed size
1154            zip64 = self._allowZip64 and \
1155                    zinfo.file_size * 1.05 > ZIP64_LIMIT
1156            self.fp.write(zinfo.FileHeader(zip64))
1157            if zinfo.compress_type == ZIP_DEFLATED:
1158                cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1159                     zlib.DEFLATED, -15)
1160            else:
1161                cmpr = None
1162            file_size = 0
1163            while 1:
1164                buf = fp.read(1024 * 8)
1165                if not buf:
1166                    break
1167                file_size = file_size + len(buf)
1168                CRC = crc32(buf, CRC) & 0xffffffff
1169                if cmpr:
1170                    buf = cmpr.compress(buf)
1171                    compress_size = compress_size + len(buf)
1172                self.fp.write(buf)
1173        if cmpr:
1174            buf = cmpr.flush()
1175            compress_size = compress_size + len(buf)
1176            self.fp.write(buf)
1177            zinfo.compress_size = compress_size
1178        else:
1179            zinfo.compress_size = file_size
1180        zinfo.CRC = CRC
1181        zinfo.file_size = file_size
1182        if not zip64 and self._allowZip64:
1183            if file_size > ZIP64_LIMIT:
1184                raise RuntimeError('File size has increased during compressing')
1185            if compress_size > ZIP64_LIMIT:
1186                raise RuntimeError('Compressed size larger than uncompressed size')
1187        # Seek backwards and write file header (which will now include
1188        # correct CRC and file sizes)
1189        position = self.fp.tell()       # Preserve current position in file
1190        self.fp.seek(zinfo.header_offset, 0)
1191        self.fp.write(zinfo.FileHeader(zip64))
1192        self.fp.seek(position, 0)
1193        self.filelist.append(zinfo)
1194        self.NameToInfo[zinfo.filename] = zinfo
1195
1196    def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1197        """Write a file into the archive.  The contents is the string
1198        'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1199        the name of the file in the archive."""
1200        if not isinstance(zinfo_or_arcname, ZipInfo):
1201            zinfo = ZipInfo(filename=zinfo_or_arcname,
1202                            date_time=time.localtime(time.time())[:6])
1203
1204            zinfo.compress_type = self.compression
1205            zinfo.external_attr = 0600 << 16
1206        else:
1207            zinfo = zinfo_or_arcname
1208
1209        if not self.fp:
1210            raise RuntimeError(
1211                  "Attempt to write to ZIP archive that was already closed")
1212
1213        if compress_type is not None:
1214            zinfo.compress_type = compress_type
1215
1216        zinfo.file_size = len(bytes)            # Uncompressed size
1217        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1218        self._writecheck(zinfo)
1219        self._didModify = True
1220        zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1221        if zinfo.compress_type == ZIP_DEFLATED:
1222            co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1223                 zlib.DEFLATED, -15)
1224            bytes = co.compress(bytes) + co.flush()
1225            zinfo.compress_size = len(bytes)    # Compressed size
1226        else:
1227            zinfo.compress_size = zinfo.file_size
1228        zip64 = zinfo.file_size > ZIP64_LIMIT or \
1229                zinfo.compress_size > ZIP64_LIMIT
1230        if zip64 and not self._allowZip64:
1231            raise LargeZipFile("Filesize would require ZIP64 extensions")
1232        self.fp.write(zinfo.FileHeader(zip64))
1233        self.fp.write(bytes)
1234        if zinfo.flag_bits & 0x08:
1235            # Write CRC and file sizes after the file data
1236            fmt = '<LQQ' if zip64 else '<LLL'
1237            self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1238                  zinfo.file_size))
1239        self.fp.flush()
1240        self.filelist.append(zinfo)
1241        self.NameToInfo[zinfo.filename] = zinfo
1242
1243    def __del__(self):
1244        """Call the "close()" method in case the user forgot."""
1245        self.close()
1246
1247    def close(self):
1248        """Close the file, and for mode "w" and "a" write the ending
1249        records."""
1250        if self.fp is None:
1251            return
1252
1253        try:
1254            if self.mode in ("w", "a") and self._didModify: # write ending records
1255                count = 0
1256                pos1 = self.fp.tell()
1257                for zinfo in self.filelist:         # write central directory
1258                    count = count + 1
1259                    dt = zinfo.date_time
1260                    dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1261                    dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1262                    extra = []
1263                    if zinfo.file_size > ZIP64_LIMIT \
1264                            or zinfo.compress_size > ZIP64_LIMIT:
1265                        extra.append(zinfo.file_size)
1266                        extra.append(zinfo.compress_size)
1267                        file_size = 0xffffffff
1268                        compress_size = 0xffffffff
1269                    else:
1270                        file_size = zinfo.file_size
1271                        compress_size = zinfo.compress_size
1272
1273                    if zinfo.header_offset > ZIP64_LIMIT:
1274                        extra.append(zinfo.header_offset)
1275                        header_offset = 0xffffffffL
1276                    else:
1277                        header_offset = zinfo.header_offset
1278
1279                    extra_data = zinfo.extra
1280                    if extra:
1281                        # Append a ZIP64 field to the extra's
1282                        extra_data = struct.pack(
1283                                '<HH' + 'Q'*len(extra),
1284                                1, 8*len(extra), *extra) + extra_data
1285
1286                        extract_version = max(45, zinfo.extract_version)
1287                        create_version = max(45, zinfo.create_version)
1288                    else:
1289                        extract_version = zinfo.extract_version
1290                        create_version = zinfo.create_version
1291
1292                    try:
1293                        filename, flag_bits = zinfo._encodeFilenameFlags()
1294                        centdir = struct.pack(structCentralDir,
1295                        stringCentralDir, create_version,
1296                        zinfo.create_system, extract_version, zinfo.reserved,
1297                        flag_bits, zinfo.compress_type, dostime, dosdate,
1298                        zinfo.CRC, compress_size, file_size,
1299                        len(filename), len(extra_data), len(zinfo.comment),
1300                        0, zinfo.internal_attr, zinfo.external_attr,
1301                        header_offset)
1302                    except DeprecationWarning:
1303                        print >>sys.stderr, (structCentralDir,
1304                        stringCentralDir, create_version,
1305                        zinfo.create_system, extract_version, zinfo.reserved,
1306                        zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1307                        zinfo.CRC, compress_size, file_size,
1308                        len(zinfo.filename), len(extra_data), len(zinfo.comment),
1309                        0, zinfo.internal_attr, zinfo.external_attr,
1310                        header_offset)
1311                        raise
1312                    self.fp.write(centdir)
1313                    self.fp.write(filename)
1314                    self.fp.write(extra_data)
1315                    self.fp.write(zinfo.comment)
1316
1317                pos2 = self.fp.tell()
1318                # Write end-of-zip-archive record
1319                centDirCount = count
1320                centDirSize = pos2 - pos1
1321                centDirOffset = pos1
1322                if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1323                    centDirOffset > ZIP64_LIMIT or
1324                    centDirSize > ZIP64_LIMIT):
1325                    # Need to write the ZIP64 end-of-archive records
1326                    zip64endrec = struct.pack(
1327                            structEndArchive64, stringEndArchive64,
1328                            44, 45, 45, 0, 0, centDirCount, centDirCount,
1329                            centDirSize, centDirOffset)
1330                    self.fp.write(zip64endrec)
1331
1332                    zip64locrec = struct.pack(
1333                            structEndArchive64Locator,
1334                            stringEndArchive64Locator, 0, pos2, 1)
1335                    self.fp.write(zip64locrec)
1336                    centDirCount = min(centDirCount, 0xFFFF)
1337                    centDirSize = min(centDirSize, 0xFFFFFFFF)
1338                    centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1339
1340                endrec = struct.pack(structEndArchive, stringEndArchive,
1341                                    0, 0, centDirCount, centDirCount,
1342                                    centDirSize, centDirOffset, len(self._comment))
1343                self.fp.write(endrec)
1344                self.fp.write(self._comment)
1345                self.fp.flush()
1346        finally:
1347            fp = self.fp
1348            self.fp = None
1349            if not self._filePassed:
1350                fp.close()
1351
1352
1353class PyZipFile(ZipFile):
1354    """Class to create ZIP archives with Python library files and packages."""
1355
1356    def writepy(self, pathname, basename = ""):
1357        """Add all files from "pathname" to the ZIP archive.
1358
1359        If pathname is a package directory, search the directory and
1360        all package subdirectories recursively for all *.py and enter
1361        the modules into the archive.  If pathname is a plain
1362        directory, listdir *.py and enter all modules.  Else, pathname
1363        must be a Python *.py file and the module will be put into the
1364        archive.  Added modules are always module.pyo or module.pyc.
1365        This method will compile the module.py into module.pyc if
1366        necessary.
1367        """
1368        dir, name = os.path.split(pathname)
1369        if os.path.isdir(pathname):
1370            initname = os.path.join(pathname, "__init__.py")
1371            if os.path.isfile(initname):
1372                # This is a package directory, add it
1373                if basename:
1374                    basename = "%s/%s" % (basename, name)
1375                else:
1376                    basename = name
1377                if self.debug:
1378                    print "Adding package in", pathname, "as", basename
1379                fname, arcname = self._get_codename(initname[0:-3], basename)
1380                if self.debug:
1381                    print "Adding", arcname
1382                self.write(fname, arcname)
1383                dirlist = os.listdir(pathname)
1384                dirlist.remove("__init__.py")
1385                # Add all *.py files and package subdirectories
1386                for filename in dirlist:
1387                    path = os.path.join(pathname, filename)
1388                    root, ext = os.path.splitext(filename)
1389                    if os.path.isdir(path):
1390                        if os.path.isfile(os.path.join(path, "__init__.py")):
1391                            # This is a package directory, add it
1392                            self.writepy(path, basename)  # Recursive call
1393                    elif ext == ".py":
1394                        fname, arcname = self._get_codename(path[0:-3],
1395                                         basename)
1396                        if self.debug:
1397                            print "Adding", arcname
1398                        self.write(fname, arcname)
1399            else:
1400                # This is NOT a package directory, add its files at top level
1401                if self.debug:
1402                    print "Adding files from directory", pathname
1403                for filename in os.listdir(pathname):
1404                    path = os.path.join(pathname, filename)
1405                    root, ext = os.path.splitext(filename)
1406                    if ext == ".py":
1407                        fname, arcname = self._get_codename(path[0:-3],
1408                                         basename)
1409                        if self.debug:
1410                            print "Adding", arcname
1411                        self.write(fname, arcname)
1412        else:
1413            if pathname[-3:] != ".py":
1414                raise RuntimeError, \
1415                      'Files added with writepy() must end with ".py"'
1416            fname, arcname = self._get_codename(pathname[0:-3], basename)
1417            if self.debug:
1418                print "Adding file", arcname
1419            self.write(fname, arcname)
1420
1421    def _get_codename(self, pathname, basename):
1422        """Return (filename, archivename) for the path.
1423
1424        Given a module name path, return the correct file path and
1425        archive name, compiling if necessary.  For example, given
1426        /python/lib/string, return (/python/lib/string.pyc, string).
1427        """
1428        file_py  = pathname + ".py"
1429        file_pyc = pathname + ".pyc"
1430        file_pyo = pathname + ".pyo"
1431        if os.path.isfile(file_pyo) and \
1432                            os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1433            fname = file_pyo    # Use .pyo file
1434        elif not os.path.isfile(file_pyc) or \
1435             os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1436            import py_compile
1437            if self.debug:
1438                print "Compiling", file_py
1439            try:
1440                py_compile.compile(file_py, file_pyc, None, True)
1441            except py_compile.PyCompileError,err:
1442                print err.msg
1443            fname = file_pyc
1444        else:
1445            fname = file_pyc
1446        archivename = os.path.split(fname)[1]
1447        if basename:
1448            archivename = "%s/%s" % (basename, archivename)
1449        return (fname, archivename)
1450
1451
1452def main(args = None):
1453    import textwrap
1454    USAGE=textwrap.dedent("""\
1455        Usage:
1456            zipfile.py -l zipfile.zip        # Show listing of a zipfile
1457            zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1458            zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1459            zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1460        """)
1461    if args is None:
1462        args = sys.argv[1:]
1463
1464    if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1465        print USAGE
1466        sys.exit(1)
1467
1468    if args[0] == '-l':
1469        if len(args) != 2:
1470            print USAGE
1471            sys.exit(1)
1472        with ZipFile(args[1], 'r') as zf:
1473            zf.printdir()
1474
1475    elif args[0] == '-t':
1476        if len(args) != 2:
1477            print USAGE
1478            sys.exit(1)
1479        with ZipFile(args[1], 'r') as zf:
1480            badfile = zf.testzip()
1481        if badfile:
1482            print("The following enclosed file is corrupted: {!r}".format(badfile))
1483        print "Done testing"
1484
1485    elif args[0] == '-e':
1486        if len(args) != 3:
1487            print USAGE
1488            sys.exit(1)
1489
1490        with ZipFile(args[1], 'r') as zf:
1491            out = args[2]
1492            for path in zf.namelist():
1493                if path.startswith('./'):
1494                    tgt = os.path.join(out, path[2:])
1495                else:
1496                    tgt = os.path.join(out, path)
1497
1498                tgtdir = os.path.dirname(tgt)
1499                if not os.path.exists(tgtdir):
1500                    os.makedirs(tgtdir)
1501                with open(tgt, 'wb') as fp:
1502                    fp.write(zf.read(path))
1503
1504    elif args[0] == '-c':
1505        if len(args) < 3:
1506            print USAGE
1507            sys.exit(1)
1508
1509        def addToZip(zf, path, zippath):
1510            if os.path.isfile(path):
1511                zf.write(path, zippath, ZIP_DEFLATED)
1512            elif os.path.isdir(path):
1513                for nm in os.listdir(path):
1514                    addToZip(zf,
1515                            os.path.join(path, nm), os.path.join(zippath, nm))
1516            # else: ignore
1517
1518        with ZipFile(args[1], 'w', allowZip64=True) as zf:
1519            for src in args[2:]:
1520                addToZip(zf, src, os.path.basename(src))
1521
1522if __name__ == "__main__":
1523    main()
1524