10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao""" 20a8c90248264a8b26970b4473770bcc3df8515fJosh GaoRead and write ZIP files. 30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao""" 40a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport struct, os, time, sys, shutil 50a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport binascii, cStringIO, stat 60a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport io 70a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport re 80a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport string 90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 100a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry: 110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import zlib # We may need its compression method 120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao crc32 = zlib.crc32 130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept ImportError: 140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zlib = None 150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao crc32 = binascii.crc32 160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", 180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] 190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 200a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass BadZipfile(Exception): 210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 240a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass LargeZipFile(Exception): 250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Raised when writing a zipfile, the zipfile requires ZIP64 extensions 270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and those extensions are disabled. 280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 300a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoerror = BadZipfile # The exception raised by this module 310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 320a8c90248264a8b26970b4473770bcc3df8515fJosh GaoZIP64_LIMIT = (1 << 31) - 1 330a8c90248264a8b26970b4473770bcc3df8515fJosh GaoZIP_FILECOUNT_LIMIT = 1 << 16 340a8c90248264a8b26970b4473770bcc3df8515fJosh GaoZIP_MAX_COMMENT = (1 << 16) - 1 350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# constants for Zip file compression methods 370a8c90248264a8b26970b4473770bcc3df8515fJosh GaoZIP_STORED = 0 380a8c90248264a8b26970b4473770bcc3df8515fJosh GaoZIP_DEFLATED = 8 390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Other ZIP compression methods not supported 400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Below are some formats and associated data for reading/writing headers using 420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# the struct module. The names and structures of headers/records are those used 430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# in the PKWARE description of the ZIP file format: 440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (URL valid as of January 2008) 460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The "end of central directory" structure, magic number, size, and indices 480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (section V.I in the format document) 490a8c90248264a8b26970b4473770bcc3df8515fJosh GaostructEndArchive = "<4s4H2LH" 500a8c90248264a8b26970b4473770bcc3df8515fJosh GaostringEndArchive = "PK\005\006" 510a8c90248264a8b26970b4473770bcc3df8515fJosh GaosizeEndCentDir = struct.calcsize(structEndArchive) 520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_SIGNATURE = 0 540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_DISK_NUMBER = 1 550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_DISK_START = 2 560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_ENTRIES_THIS_DISK = 3 570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_ENTRIES_TOTAL = 4 580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_SIZE = 5 590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_OFFSET = 6 600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_COMMENT_SIZE = 7 610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# These last two indices are not part of the structure as defined in the 620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# spec, but they are used internally by this module as a convenience 630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_COMMENT = 8 640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ECD_LOCATION = 9 650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The "central directory" structure, magic number, size, and indices 670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# of entries in the structure (section V.F in the format document) 680a8c90248264a8b26970b4473770bcc3df8515fJosh GaostructCentralDir = "<4s4B4HL2L5H2L" 690a8c90248264a8b26970b4473770bcc3df8515fJosh GaostringCentralDir = "PK\001\002" 700a8c90248264a8b26970b4473770bcc3df8515fJosh GaosizeCentralDir = struct.calcsize(structCentralDir) 710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# indexes of entries in the central directory structure 730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_SIGNATURE = 0 740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_CREATE_VERSION = 1 750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_CREATE_SYSTEM = 2 760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_EXTRACT_VERSION = 3 770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_EXTRACT_SYSTEM = 4 780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_FLAG_BITS = 5 790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_COMPRESS_TYPE = 6 800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_TIME = 7 810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_DATE = 8 820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_CRC = 9 830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_COMPRESSED_SIZE = 10 840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_UNCOMPRESSED_SIZE = 11 850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_FILENAME_LENGTH = 12 860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_EXTRA_FIELD_LENGTH = 13 870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_COMMENT_LENGTH = 14 880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_DISK_NUMBER_START = 15 890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_INTERNAL_FILE_ATTRIBUTES = 16 900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_EXTERNAL_FILE_ATTRIBUTES = 17 910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD_LOCAL_HEADER_OFFSET = 18 920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The "local file header" structure, magic number, size, and indices 940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (section V.A in the format document) 950a8c90248264a8b26970b4473770bcc3df8515fJosh GaostructFileHeader = "<4s2B4HL2L2H" 960a8c90248264a8b26970b4473770bcc3df8515fJosh GaostringFileHeader = "PK\003\004" 970a8c90248264a8b26970b4473770bcc3df8515fJosh GaosizeFileHeader = struct.calcsize(structFileHeader) 980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_SIGNATURE = 0 1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_EXTRACT_VERSION = 1 1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_EXTRACT_SYSTEM = 2 1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_GENERAL_PURPOSE_FLAG_BITS = 3 1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_COMPRESSION_METHOD = 4 1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_LAST_MOD_TIME = 5 1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_LAST_MOD_DATE = 6 1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_CRC = 7 1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_COMPRESSED_SIZE = 8 1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_UNCOMPRESSED_SIZE = 9 1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_FILENAME_LENGTH = 10 1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_FH_EXTRA_FIELD_LENGTH = 11 1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The "Zip64 end of central directory locator" structure, magic number, and size 1130a8c90248264a8b26970b4473770bcc3df8515fJosh GaostructEndArchive64Locator = "<4sLQL" 1140a8c90248264a8b26970b4473770bcc3df8515fJosh GaostringEndArchive64Locator = "PK\x06\x07" 1150a8c90248264a8b26970b4473770bcc3df8515fJosh GaosizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The "Zip64 end of central directory" record, magic number, size, and indices 1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# (section V.G in the format document) 1190a8c90248264a8b26970b4473770bcc3df8515fJosh GaostructEndArchive64 = "<4sQ2H2L4Q" 1200a8c90248264a8b26970b4473770bcc3df8515fJosh GaostringEndArchive64 = "PK\x06\x06" 1210a8c90248264a8b26970b4473770bcc3df8515fJosh GaosizeEndCentDir64 = struct.calcsize(structEndArchive64) 1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_SIGNATURE = 0 1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_DIRECTORY_RECSIZE = 1 1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_CREATE_VERSION = 2 1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_EXTRACT_VERSION = 3 1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_DISK_NUMBER = 4 1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_DISK_NUMBER_START = 5 1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_NUMBER_ENTRIES_THIS_DISK = 6 1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_NUMBER_ENTRIES_TOTAL = 7 1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_DIRECTORY_SIZE = 8 1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_CD64_OFFSET_START_CENTDIR = 9 1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _check_zipfile(fp): 1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if _EndRecData(fp): 1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return True # file has correct magic number 1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return False 1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef is_zipfile(filename): 1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Quickly see if a file is a ZIP file by checking the magic number. 1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao The filename argument may be a file or file-like object too. 1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = False 1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(filename, "read"): 1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = _check_zipfile(fp=filename) 1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with open(filename, "rb") as fp: 1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = _check_zipfile(fp) 1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return result 1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _EndRecData64(fpin, offset, endrec): 1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Read the ZIP64 end-of-archive records and use that to update endrec 1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fpin.seek(offset - sizeEndCentDir64Locator, 2) 1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If the seek fails, the file is not large enough to contain a ZIP64 1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # end-of-archive record, so just return the end record we were given. 1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return endrec 1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = fpin.read(sizeEndCentDir64Locator) 1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(data) != sizeEndCentDir64Locator: 1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return endrec 1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sig != stringEndArchive64Locator: 1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return endrec 1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if diskno != 0 or disks != 1: 1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile("zipfiles that span multiple disks are not supported") 1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Assume no 'zip64 extensible data' 1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = fpin.read(sizeEndCentDir64) 1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(data) != sizeEndCentDir64: 1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return endrec 1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sig, sz, create_version, read_version, disk_num, disk_dir, \ 1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dircount, dircount2, dirsize, diroffset = \ 1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao struct.unpack(structEndArchive64, data) 1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sig != stringEndArchive64: 1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return endrec 1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Update the original endrec using data from the ZIP64 record 1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec[_ECD_SIGNATURE] = sig 1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec[_ECD_DISK_NUMBER] = disk_num 1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec[_ECD_DISK_START] = disk_dir 1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec[_ECD_ENTRIES_THIS_DISK] = dircount 1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec[_ECD_ENTRIES_TOTAL] = dircount2 1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec[_ECD_SIZE] = dirsize 1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec[_ECD_OFFSET] = diroffset 1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return endrec 1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _EndRecData(fpin): 2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return data from the "End of Central Directory" record, or None. 2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao The data is a list of the nine items in the ZIP "End of central dir" 2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao record followed by a tenth item, the file seek offset of this record.""" 2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Determine file size 2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fpin.seek(0, 2) 2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filesize = fpin.tell() 2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Check to see if this is ZIP file with no archive comment (the 2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "end of central directory" structure should be the last item in the 2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # file if this is the case). 2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fpin.seek(-sizeEndCentDir, 2) 2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None 2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = fpin.read() 2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if (len(data) == sizeEndCentDir and 2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data[0:4] == stringEndArchive and 2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data[-2:] == b"\000\000"): 2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the signature is correct and there's no comment, unpack structure 2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec = struct.unpack(structEndArchive, data) 2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec=list(endrec) 2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Append a blank comment and record start offset 2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec.append("") 2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec.append(filesize - sizeEndCentDir) 2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Try to read the "Zip64 end of central directory" structure 2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _EndRecData64(fpin, -sizeEndCentDir, endrec) 2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Either this is not a ZIP file, or it is a ZIP file with an archive 2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # comment. Search the end of the file for the "end of central directory" 2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # record signature. The comment is the last item in the ZIP file and may be 2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # up to 64K long. It is assumed that the "end of central directory" magic 2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # number does not appear in the comment. 2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fpin.seek(maxCommentStart, 0) 2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = fpin.read() 2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao start = data.rfind(stringEndArchive) 2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if start >= 0: 2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # found the magic number; attempt to unpack and interpret 2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao recData = data[start:start+sizeEndCentDir] 2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(recData) != sizeEndCentDir: 2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Zip file is corrupted. 2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None 2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec = list(struct.unpack(structEndArchive, recData)) 2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec.append(comment) 2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec.append(maxCommentStart + start) 2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Try to read the "Zip64 end of central directory" structure 2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return _EndRecData64(fpin, maxCommentStart + start - filesize, 2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec) 2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Unable to find a valid end of central directory structure 2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None 2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ZipInfo (object): 2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Class with attributes describing each file in the ZIP archive.""" 2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao __slots__ = ( 2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'orig_filename', 2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'filename', 2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'date_time', 2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'compress_type', 2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'comment', 2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'extra', 2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'create_system', 2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'create_version', 2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'extract_version', 2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'reserved', 2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'flag_bits', 2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'volume', 2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'internal_attr', 2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'external_attr', 2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'header_offset', 2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'CRC', 2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'compress_size', 2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'file_size', 2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao '_raw_time', 2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.orig_filename = filename # Original file name in archive 2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Terminate the file name at the first null byte. Null bytes in file 2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # names are used as tricks by viruses in archives. 2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao null_byte = filename.find(chr(0)) 2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if null_byte >= 0: 2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filename = filename[0:null_byte] 2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This is used to ensure paths in generated ZIP files always use 2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # forward slashes as the directory separator, as required by the 2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # ZIP format specification. 2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.sep != "/" and os.sep in filename: 2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filename = filename.replace(os.sep, "/") 3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filename = filename # Normalized file name 3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.date_time = date_time # year, month, day, hour, min, sec 3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if date_time[0] < 1980: 3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError('ZIP does not support timestamps before 1980') 3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Standard values: 3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.compress_type = ZIP_STORED # Type of compression for the file 3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.comment = "" # Comment for each file 3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.extra = "" # ZIP extra data 3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.platform == 'win32': 3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.create_system = 0 # System which created ZIP archive 3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Assume everything else is unix-y 3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.create_system = 3 # System which created ZIP archive 3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.create_version = 20 # Version which created ZIP archive 3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.extract_version = 20 # Version needed to extract archive 3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.reserved = 0 # Must be zero 3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.flag_bits = 0 # ZIP flag bits 3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.volume = 0 # Volume number of file header 3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.internal_attr = 0 # Internal attributes 3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.external_attr = 0 # External file attributes 3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Other attributes are set by class ZipFile: 3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # header_offset Byte offset to the file header 3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # CRC CRC-32 of the uncompressed file 3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # compress_size Size of the compressed file 3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # file_size Size of the uncompressed file 3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def FileHeader(self, zip64=None): 3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the per-file header as a string.""" 3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dt = self.date_time 3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.flag_bits & 0x08: 3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Set these to zero because we write them after the file data 3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao CRC = compress_size = file_size = 0 3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao CRC = self.CRC 3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compress_size = self.compress_size 3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_size = self.file_size 3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra = self.extra 3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zip64 is None: 3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zip64: 3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fmt = '<HHQQ' 3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra = extra + struct.pack(fmt, 3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1, struct.calcsize(fmt)-4, file_size, compress_size) 3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not zip64: 3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise LargeZipFile("Filesize would require ZIP64 extensions") 3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # File is larger than what fits into a 4 byte integer, 3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # fall back to the ZIP64 extension 3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_size = 0xffffffff 3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compress_size = 0xffffffff 3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.extract_version = max(45, self.extract_version) 3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.create_version = max(45, self.extract_version) 3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filename, flag_bits = self._encodeFilenameFlags() 3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao header = struct.pack(structFileHeader, stringFileHeader, 3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.extract_version, self.reserved, flag_bits, 3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.compress_type, dostime, dosdate, CRC, 3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compress_size, file_size, 3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao len(filename), len(extra)) 3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return header + filename + extra 3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _encodeFilenameFlags(self): 3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(self.filename, unicode): 3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.filename.encode('ascii'), self.flag_bits 3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeEncodeError: 3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.filename.encode('utf-8'), self.flag_bits | 0x800 3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.filename, self.flag_bits 3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _decodeFilename(self): 3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.flag_bits & 0x800: 3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.filename.decode('utf-8') 3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.filename 3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _decodeExtra(self): 3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Try to decode the extra field. 3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra = self.extra 3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao unpack = struct.unpack 3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while extra: 3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tp, ln = unpack('<HH', extra[:4]) 3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tp == 1: 3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ln >= 24: 3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao counts = unpack('<QQQ', extra[4:28]) 3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif ln == 16: 3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao counts = unpack('<QQ', extra[4:20]) 3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif ln == 8: 3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao counts = unpack('<Q', extra[4:12]) 3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif ln == 0: 3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao counts = () 3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, "Corrupt extra field %s"%(ln,) 4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao idx = 0 4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # ZIP64 extension (large files and/or large archives) 4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.file_size in (0xffffffffffffffffL, 0xffffffffL): 4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.file_size = counts[idx] 4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao idx += 1 4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.compress_size == 0xFFFFFFFFL: 4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.compress_size = counts[idx] 4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao idx += 1 4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.header_offset == 0xffffffffL: 4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao old = self.header_offset 4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.header_offset = counts[idx] 4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao idx+=1 4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra = extra[ln+4:] 4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _ZipDecrypter: 4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Class to handle decryption of files stored within a ZIP archive. 4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ZIP supports a password-based form of encryption. Even though known 4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao plaintext attacks have been found against it, it is still useful 4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao to be able to get data out of such a file. 4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Usage: 4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zd = _ZipDecrypter(mypwd) 4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao plain_char = zd(cypher_char) 4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao plain_text = map(zd, cypher_text) 4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _GenerateCRCTable(): 4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Generate a CRC-32 table. 4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ZIP encryption uses the CRC32 one-byte primitive for scrambling some 4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao internal keys. We noticed that a direct implementation is faster than 4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao relying on binascii.crc32(). 4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao poly = 0xedb88320 4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao table = [0] * 256 4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in range(256): 4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao crc = i 4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for j in range(8): 4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if crc & 1: 4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly 4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao crc = ((crc >> 1) & 0x7FFFFFFF) 4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao table[i] = crc 4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return table 4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao crctable = _GenerateCRCTable() 4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _crc32(self, ch, crc): 4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Compute the CRC32 primitive on one byte.""" 4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] 4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, pwd): 4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key0 = 305419896 4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key1 = 591751049 4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key2 = 878082192 4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for p in pwd: 4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._UpdateKeys(p) 4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _UpdateKeys(self, c): 4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key0 = self._crc32(c, self.key0) 4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key1 = (self.key1 * 134775813 + 1) & 4294967295 4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) 4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __call__(self, c): 4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Decrypt a single character.""" 4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao c = ord(c) 4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao k = self.key2 | 2 4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao c = c ^ (((k * (k^1)) >> 8) & 255) 4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao c = chr(c) 4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._UpdateKeys(c) 4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return c 4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gaocompressor_names = { 4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 0: 'store', 4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1: 'shrink', 4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2: 'reduce', 4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3: 'reduce', 4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4: 'reduce', 4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5: 'reduce', 4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6: 'implode', 4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7: 'tokenize', 4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8: 'deflate', 4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9: 'deflate64', 4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10: 'implode', 4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12: 'bzip2', 4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14: 'lzma', 4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18: 'terse', 4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19: 'lz77', 4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 97: 'wavpack', 4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 98: 'ppmd', 4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao} 4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ZipExtFile(io.BufferedIOBase): 5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """File-like object for reading an archive member. 5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Is returned by ZipFile.open(). 5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Max size supported by decompressor. 5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao MAX_N = 1 << 31 - 1 5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Read from compressed files in 4k blocks. 5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao MIN_READ_SIZE = 4096 5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Search for universal newlines or line chunks. 5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') 5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fileobj, mode, zipinfo, decrypter=None, 5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao close_fileobj=False): 5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._fileobj = fileobj 5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._decrypter = decrypter 5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._close_fileobj = close_fileobj 5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._compress_type = zipinfo.compress_type 5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._compress_size = zipinfo.compress_size 5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._compress_left = zipinfo.compress_size 5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._compress_type == ZIP_DEFLATED: 5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._decompressor = zlib.decompressobj(-15) 5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self._compress_type != ZIP_STORED: 5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao descr = compressor_names.get(self._compress_type) 5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if descr: 5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr)) 5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise NotImplementedError("compression type %d" % (self._compress_type,)) 5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._unconsumed = '' 5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._readbuffer = '' 5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset = 0 5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._universal = 'U' in mode 5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.newlines = None 5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Adjust read size for encrypted files since the first 12 bytes 5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # are for the encryption/password information. 5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._decrypter is not None: 5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._compress_left -= 12 5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = mode 5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = zipinfo.filename 5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(zipinfo, 'CRC'): 5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._expected_crc = zipinfo.CRC 5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._running_crc = crc32(b'') & 0xffffffff 5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._expected_crc = None 5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def readline(self, limit=-1): 5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read and return a line from the stream. 5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If limit is specified, at most limit bytes will be read. 5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._universal and limit < 0: 5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Shortcut common case - newline found in buffer. 5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao i = self._readbuffer.find('\n', self._offset) + 1 5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if i > 0: 5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao line = self._readbuffer[self._offset: i] 5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset = i 5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return line 5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._universal: 5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return io.BufferedIOBase.readline(self, limit) 5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao line = '' 5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while limit < 0 or len(line) < limit: 5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao readahead = self.peek(2) 5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if readahead == '': 5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return line 5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Search for universal newlines or line chunks. 5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The pattern returns either a line chunk or a newline, but not 5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # both. Combined with peek(2), we are assured that the sequence 5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # '\r\n' is always retrieved completely and never split into 5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # separate newlines - '\r', '\n' due to coincidental readaheads. 5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = self.PATTERN.search(readahead) 5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao newline = match.group('newline') 5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if newline is not None: 5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.newlines is None: 5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.newlines = [] 5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if newline not in self.newlines: 5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.newlines.append(newline) 5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset += len(newline) 5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return line + '\n' 5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao chunk = match.group('chunk') 5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if limit >= 0: 5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao chunk = chunk[: limit - len(line)] 5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset += len(chunk) 6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao line += chunk 6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return line 6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def peek(self, n=1): 6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Returns buffered bytes without advancing the position.""" 6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if n > len(self._readbuffer) - self._offset: 6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao chunk = self.read(n) 6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset -= len(chunk) 6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Return up to 512 bytes to reduce allocation overhead for tight loops. 6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._readbuffer[self._offset: self._offset + 512] 6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def readable(self): 6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return True 6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, n=-1): 6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read and return up to n bytes. 6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. 6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = '' 6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if n is None: 6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = -1 6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if n < 0: 6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = self.read1(n) 6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif n > len(buf): 6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = self.read1(n - len(buf)) 6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf 6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(data) == 0: 6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf 6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf += data 6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _update_crc(self, newdata, eof): 6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Update the CRC using the given data. 6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._expected_crc is None: 6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # No need to compute the CRC if we don't have a reference value 6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff 6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Check the CRC if we're at the end of the file 6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if eof and self._running_crc != self._expected_crc: 6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile("Bad CRC-32 for file %r" % self.name) 6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read1(self, n): 6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read up to n bytes with at most one read() system call.""" 6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Simplify algorithm (branching) by transforming negative n to large n. 6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if n < 0 or n is None: 6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = self.MAX_N 6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Bytes available in read buffer. 6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao len_readbuffer = len(self._readbuffer) - self._offset 6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Read from file. 6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): 6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao nbytes = n - len_readbuffer - len(self._unconsumed) 6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao nbytes = max(nbytes, self.MIN_READ_SIZE) 6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao nbytes = min(nbytes, self._compress_left) 6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = self._fileobj.read(nbytes) 6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._compress_left -= len(data) 6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if data and self._decrypter is not None: 6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = ''.join(map(self._decrypter, data)) 6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._compress_type == ZIP_STORED: 6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._update_crc(data, eof=(self._compress_left==0)) 6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._readbuffer = self._readbuffer[self._offset:] + data 6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset = 0 6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Prepare deflated bytes for decompression. 6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._unconsumed += data 6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Handle unconsumed data. 6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if (len(self._unconsumed) > 0 and n > len_readbuffer and 6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._compress_type == ZIP_DEFLATED): 6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = self._decompressor.decompress( 6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._unconsumed, 6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao max(n - len_readbuffer, self.MIN_READ_SIZE) 6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ) 6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._unconsumed = self._decompressor.unconsumed_tail 6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao eof = len(self._unconsumed) == 0 and self._compress_left == 0 6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if eof: 6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data += self._decompressor.flush() 6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._update_crc(data, eof=eof) 6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._readbuffer = self._readbuffer[self._offset:] + data 6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset = 0 6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Read from buffer. 6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = self._readbuffer[self._offset: self._offset + n] 6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._offset += len(data) 6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return data 6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try : 6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._close_fileobj: 7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._fileobj.close() 7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao super(ZipExtFile, self).close() 7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ZipFile(object): 7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ Class with methods to open, read, write, close, list zip files. 7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) 7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file: Either the path to the file, or a file-like object. 7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If it is a path, the file will be opened and closed by ZipFile. 7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode: The mode can be either read "r", write "w" or append "a". 7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). 7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao allowZip64: if True ZipFile will create files with ZIP64 extensions when 7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao needed, otherwise it will raise an exception when this would 7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao be necessary. 7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = None # Set here since __del__ checks it 7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): 7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Open the ZIP file with mode read "r", write "w" or append "a".""" 7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode not in ("r", "w", "a"): 7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') 7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if compression == ZIP_STORED: 7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif compression == ZIP_DEFLATED: 7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not zlib: 7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError,\ 7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Compression requires the (missing) zlib module" 7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, "That compression method is not supported" 7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._allowZip64 = allowZip64 7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._didModify = False 7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.debug = 0 # Level of printing: 0 through 3 7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.NameToInfo = {} # Find file info given name 7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filelist = [] # List of ZipInfo instances for archive 7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.compression = compression # Method of compression 7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = key = mode.replace('b', '')[0] 7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pwd = None 7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._comment = '' 7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Check if we were passed a file-like object 7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(file, basestring): 7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._filePassed = 0 7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filename = file 7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} 7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp = open(file, modeDict[mode]) 7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode == 'a': 7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode = key = 'w' 7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp = open(file, modeDict[mode]) 7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._filePassed = 1 7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp = file 7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filename = getattr(file, 'name', None) 7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if key == 'r': 7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._RealGetContents() 7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif key == 'w': 7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # set the modified flag so central directory gets written 7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # even if no files are added to the archive 7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._didModify = True 7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif key == 'a': 7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # See if file is a zip file 7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._RealGetContents() 7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # seek to start of directory and overwrite 7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.seek(self.start_dir, 0) 7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except BadZipfile: 7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # file is not a zip file, just append 7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.seek(0, 2) 7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # set the modified flag so central directory gets written 7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # even if no files are added to the archive 7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._didModify = True 7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError('Mode must be "r", "w" or "a"') 7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except: 7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = self.fp 7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp = None 7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._filePassed: 7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __enter__(self): 7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self 7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __exit__(self, type, value, traceback): 7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.close() 7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _RealGetContents(self): 8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read in the table of contents for the ZIP file.""" 8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = self.fp 8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec = _EndRecData(fp) 8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile("File is not a zip file") 8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not endrec: 8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile, "File is not a zip file" 8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug > 1: 8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print endrec 8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size_cd = endrec[_ECD_SIZE] # bytes in central directory 8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao offset_cd = endrec[_ECD_OFFSET] # offset of central directory 8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._comment = endrec[_ECD_COMMENT] # archive comment 8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "concat" is zero, unless zip was concatenated to another file 8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if endrec[_ECD_SIGNATURE] == stringEndArchive64: 8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If Zip64 extension structures are present, account for them 8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug > 2: 8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao inferred = concat + offset_cd 8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "given, inferred, offset", offset_cd, inferred, concat 8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # self.start_dir: Position of start of central directory 8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.start_dir = offset_cd + concat 8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.seek(self.start_dir, 0) 8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = fp.read(size_cd) 8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = cStringIO.StringIO(data) 8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao total = 0 8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while total < size_cd: 8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centdir = fp.read(sizeCentralDir) 8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(centdir) != sizeCentralDir: 8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile("Truncated central directory") 8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centdir = struct.unpack(structCentralDir, centdir) 8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if centdir[_CD_SIGNATURE] != stringCentralDir: 8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile("Bad magic number for central directory") 8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug > 2: 8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print centdir 8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create ZipInfo instance to store file information 8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x = ZipInfo(filename) 8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (x.create_version, x.create_system, x.extract_version, x.reserved, 8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.flag_bits, x.compress_type, t, d, 8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.CRC, x.compress_size, x.file_size) = centdir[1:12] 8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.volume, x.internal_attr, x.external_attr = centdir[15:18] 8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Convert date/time code to (year, month, day, hour, min, sec) 8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x._raw_time = t 8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x._decodeExtra() 8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.header_offset = x.header_offset + concat 8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.filename = x._decodeFilename() 8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filelist.append(x) 8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.NameToInfo[x.filename] = x 8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # update total bytes read from central directory 8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao + centdir[_CD_EXTRA_FIELD_LENGTH] 8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao + centdir[_CD_COMMENT_LENGTH]) 8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug > 2: 8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "total", total 8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def namelist(self): 8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a list of file names in the archive.""" 8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = [] 8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for data in self.filelist: 8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l.append(data.filename) 8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return l 8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def infolist(self): 8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a list of class ZipInfo instances for files in the 8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao archive.""" 8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.filelist 8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def printdir(self): 8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Print a table of contents for the zip file.""" 8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") 8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for zinfo in self.filelist: 8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) 8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def testzip(self): 8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read all the files and check the CRC.""" 8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao chunk_size = 2 ** 20 8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for zinfo in self.filelist: 8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Read by chunks, to avoid an OverflowError or a 8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # MemoryError with very large embedded files. 8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with self.open(zinfo.filename, "r") as f: 8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while f.read(chunk_size): # Check CRC-32 8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except BadZipfile: 8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return zinfo.filename 8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getinfo(self, name): 9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the instance of ZipInfo given 'name'.""" 9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = self.NameToInfo.get(name) 9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if info is None: 9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise KeyError( 9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'There is no item named %r in the archive' % name) 9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return info 9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def setpassword(self, pwd): 9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Set default password for encrypted files.""" 9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pwd = pwd 9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @property 9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def comment(self): 9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """The comment text associated with the ZIP file.""" 9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._comment 9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @comment.setter 9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def comment(self, comment): 9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check for valid comment length 9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(comment) >= ZIP_MAX_COMMENT: 9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print('Archive comment is too long; truncating to %d bytes' 9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao % ZIP_MAX_COMMENT) 9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao comment = comment[:ZIP_MAX_COMMENT] 9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._comment = comment 9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._didModify = True 9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, name, pwd=None): 9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return file bytes (as a string) for name.""" 9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.open(name, "r", pwd).read() 9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open(self, name, mode="r", pwd=None): 9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return file-like object for 'name'.""" 9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode not in ("r", "U", "rU"): 9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, 'open() requires mode "r", "U", or "rU"' 9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self.fp: 9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, \ 9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Attempt to read ZIP archive that was already closed" 9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Only open a new file for instances where we were not 9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # given a file object in the constructor 9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._filePassed: 9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zef_file = self.fp 9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao should_close = False 9460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zef_file = open(self.filename, 'rb') 9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao should_close = True 9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Make sure we have an info object 9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(name, ZipInfo): 9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 'name' is already an info object 9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo = name 9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Get info object for name 9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo = self.getinfo(name) 9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zef_file.seek(zinfo.header_offset, 0) 9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Skip the file header: 9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fheader = zef_file.read(sizeFileHeader) 9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(fheader) != sizeFileHeader: 9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile("Truncated file header") 9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fheader = struct.unpack(structFileHeader, fheader) 9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fheader[_FH_SIGNATURE] != stringFileHeader: 9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile("Bad magic number for file header") 9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fheader[_FH_EXTRA_FIELD_LENGTH]: 9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fname != zinfo.orig_filename: 9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise BadZipfile, \ 9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'File name in directory "%s" and header "%s" differ.' % ( 9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.orig_filename, fname) 9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # check for encrypted flag & handle password 9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao is_encrypted = zinfo.flag_bits & 0x1 9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zd = None 9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if is_encrypted: 9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not pwd: 9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pwd = self.pwd 9840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not pwd: 9850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, "File %s is encrypted, " \ 9860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "password required for extraction" % name 9870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zd = _ZipDecrypter(pwd) 9890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The first 12 bytes in the cypher stream is an encryption header 9900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # used to strengthen the algorithm. The first 11 bytes are 9910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # completely random, while the 12th contains the MSB of the CRC, 9920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # or the MSB of the file time depending on the header type 9930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and is used to check the correctness of the password. 9940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao bytes = zef_file.read(12) 9950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao h = map(zd, bytes[0:12]) 9960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.flag_bits & 0x8: 9970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # compare against the file type from extended local headers 9980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao check_byte = (zinfo._raw_time >> 8) & 0xff 9990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 10000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # compare against the CRC otherwise 10010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao check_byte = (zinfo.CRC >> 24) & 0xff 10020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ord(h[11]) != check_byte: 10030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError("Bad password for file", name) 10040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return ZipExtFile(zef_file, mode, zinfo, zd, 10060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao close_fileobj=should_close) 10070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except: 10080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if should_close: 10090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zef_file.close() 10100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 10110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def extract(self, member, path=None, pwd=None): 10130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Extract a member from the archive to the current working directory, 10140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao using its full name. Its file information is extracted as accurately 10150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao as possible. `member' may be a filename or a ZipInfo object. You can 10160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao specify a different directory using `path'. 10170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(member, ZipInfo): 10190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao member = self.getinfo(member) 10200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if path is None: 10220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = os.getcwd() 10230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._extract_member(member, path, pwd) 10250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def extractall(self, path=None, members=None, pwd=None): 10270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Extract all members from the archive to the current working 10280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directory. `path' specifies a different directory to extract to. 10290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao `members' is optional and must be a subset of the list returned 10300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao by namelist(). 10310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if members is None: 10330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao members = self.namelist() 10340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for zipinfo in members: 10360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.extract(zipinfo, path, pwd) 10370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _extract_member(self, member, targetpath, pwd): 10390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Extract the ZipInfo object 'member' to a physical 10400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file on the path targetpath. 10410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # build the destination pathname, replacing 10430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # forward slashes to platform specific separators. 10440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = member.filename.replace('/', os.path.sep) 10450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.altsep: 10470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = arcname.replace(os.path.altsep, os.path.sep) 10480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # interpret absolute pathname as relative, remove drive letter or 10490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # UNC path, redundant separators, "." and ".." components. 10500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = os.path.splitdrive(arcname)[1] 10510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 10520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if x not in ('', os.path.curdir, os.path.pardir)) 10530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.sep == '\\': 10540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # filter illegal characters on Windows 10550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao illegal = ':<>|"?*' 10560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(arcname, unicode): 10570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao table = {ord(c): ord('_') for c in illegal} 10580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 10590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao table = string.maketrans(illegal, '_' * len(illegal)) 10600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = arcname.translate(table) 10610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # remove trailing dots 10620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = (x.rstrip('.') for x in arcname.split(os.path.sep)) 10630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = os.path.sep.join(x for x in arcname if x) 10640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao targetpath = os.path.join(targetpath, arcname) 10660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao targetpath = os.path.normpath(targetpath) 10670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create all upper directories if necessary. 10690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao upperdirs = os.path.dirname(targetpath) 10700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if upperdirs and not os.path.exists(upperdirs): 10710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.makedirs(upperdirs) 10720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if member.filename[-1] == '/': 10740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not os.path.isdir(targetpath): 10750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.mkdir(targetpath) 10760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return targetpath 10770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with self.open(member, pwd=pwd) as source, \ 10790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file(targetpath, "wb") as target: 10800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao shutil.copyfileobj(source, target) 10810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return targetpath 10830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _writecheck(self, zinfo): 10850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Check for errors before writing a file to the archive.""" 10860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.filename in self.NameToInfo: 10870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: # Warning for duplicate names 10880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Duplicate name:", zinfo.filename 10890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode not in ("w", "a"): 10900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, 'write() requires mode "w" or "a"' 10910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self.fp: 10920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, \ 10930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Attempt to write ZIP archive that was already closed" 10940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.compress_type == ZIP_DEFLATED and not zlib: 10950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, \ 10960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Compression requires the (missing) zlib module" 10970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): 10980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, \ 10990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "That compression method is not supported" 11000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.file_size > ZIP64_LIMIT: 11010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._allowZip64: 11020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise LargeZipFile("Filesize would require ZIP64 extensions") 11030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.header_offset > ZIP64_LIMIT: 11040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._allowZip64: 11050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise LargeZipFile("Zipfile size would require ZIP64 extensions") 11060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def write(self, filename, arcname=None, compress_type=None): 11080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Put the bytes from filename into the archive under the name 11090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname.""" 11100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self.fp: 11110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError( 11120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Attempt to write to ZIP archive that was already closed") 11130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao st = os.stat(filename) 11150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao isdir = stat.S_ISDIR(st.st_mode) 11160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mtime = time.localtime(st.st_mtime) 11170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao date_time = mtime[0:6] 11180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create ZipInfo instance to store file information 11190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if arcname is None: 11200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = filename 11210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 11220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while arcname[0] in (os.sep, os.altsep): 11230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = arcname[1:] 11240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isdir: 11250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname += '/' 11260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo = ZipInfo(arcname, date_time) 11270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes 11280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if compress_type is None: 11290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_type = self.compression 11300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 11310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_type = compress_type 11320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.file_size = st.st_size 11340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.flag_bits = 0x00 11350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.header_offset = self.fp.tell() # Start of header bytes 11360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._writecheck(zinfo) 11380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._didModify = True 11390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isdir: 11410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.file_size = 0 11420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_size = 0 11430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.CRC = 0 11440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filelist.append(zinfo) 11450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.NameToInfo[zinfo.filename] = zinfo 11460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(zinfo.FileHeader(False)) 11470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 11480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with open(filename, "rb") as fp: 11500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Must overwrite CRC and sizes with correct data later 11510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.CRC = CRC = 0 11520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_size = compress_size = 0 11530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Compressed size can be larger than uncompressed size 11540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zip64 = self._allowZip64 and \ 11550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.file_size * 1.05 > ZIP64_LIMIT 11560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(zinfo.FileHeader(zip64)) 11570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.compress_type == ZIP_DEFLATED: 11580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 11590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zlib.DEFLATED, -15) 11600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 11610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cmpr = None 11620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_size = 0 11630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while 1: 11640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = fp.read(1024 * 8) 11650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not buf: 11660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 11670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_size = file_size + len(buf) 11680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao CRC = crc32(buf, CRC) & 0xffffffff 11690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if cmpr: 11700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = cmpr.compress(buf) 11710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compress_size = compress_size + len(buf) 11720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(buf) 11730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if cmpr: 11740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = cmpr.flush() 11750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compress_size = compress_size + len(buf) 11760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(buf) 11770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_size = compress_size 11780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 11790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_size = file_size 11800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.CRC = CRC 11810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.file_size = file_size 11820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not zip64 and self._allowZip64: 11830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if file_size > ZIP64_LIMIT: 11840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError('File size has increased during compressing') 11850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if compress_size > ZIP64_LIMIT: 11860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError('Compressed size larger than uncompressed size') 11870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Seek backwards and write file header (which will now include 11880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # correct CRC and file sizes) 11890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao position = self.fp.tell() # Preserve current position in file 11900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.seek(zinfo.header_offset, 0) 11910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(zinfo.FileHeader(zip64)) 11920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.seek(position, 0) 11930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filelist.append(zinfo) 11940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.NameToInfo[zinfo.filename] = zinfo 11950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def writestr(self, zinfo_or_arcname, bytes, compress_type=None): 11970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Write a file into the archive. The contents is the string 11980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or 11990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the name of the file in the archive.""" 12000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not isinstance(zinfo_or_arcname, ZipInfo): 12010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo = ZipInfo(filename=zinfo_or_arcname, 12020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao date_time=time.localtime(time.time())[:6]) 12030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_type = self.compression 12050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.external_attr = 0600 << 16 12060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo = zinfo_or_arcname 12080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self.fp: 12100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError( 12110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "Attempt to write to ZIP archive that was already closed") 12120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if compress_type is not None: 12140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_type = compress_type 12150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.file_size = len(bytes) # Uncompressed size 12170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.header_offset = self.fp.tell() # Start of header bytes 12180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._writecheck(zinfo) 12190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._didModify = True 12200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum 12210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.compress_type == ZIP_DEFLATED: 12220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 12230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zlib.DEFLATED, -15) 12240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao bytes = co.compress(bytes) + co.flush() 12250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_size = len(bytes) # Compressed size 12260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_size = zinfo.file_size 12280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zip64 = zinfo.file_size > ZIP64_LIMIT or \ 12290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.compress_size > ZIP64_LIMIT 12300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zip64 and not self._allowZip64: 12310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise LargeZipFile("Filesize would require ZIP64 extensions") 12320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(zinfo.FileHeader(zip64)) 12330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(bytes) 12340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.flag_bits & 0x08: 12350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Write CRC and file sizes after the file data 12360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fmt = '<LQQ' if zip64 else '<LLL' 12370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, 12380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.file_size)) 12390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.flush() 12400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.filelist.append(zinfo) 12410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.NameToInfo[zinfo.filename] = zinfo 12420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __del__(self): 12440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Call the "close()" method in case the user forgot.""" 12450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.close() 12460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 12480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Close the file, and for mode "w" and "a" write the ending 12490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao records.""" 12500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.fp is None: 12510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 12520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode in ("w", "a") and self._didModify: # write ending records 12550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao count = 0 12560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos1 = self.fp.tell() 12570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for zinfo in self.filelist: # write central directory 12580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao count = count + 1 12590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dt = zinfo.date_time 12600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 12610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 12620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra = [] 12630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.file_size > ZIP64_LIMIT \ 12640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao or zinfo.compress_size > ZIP64_LIMIT: 12650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra.append(zinfo.file_size) 12660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra.append(zinfo.compress_size) 12670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_size = 0xffffffff 12680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compress_size = 0xffffffff 12690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_size = zinfo.file_size 12710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao compress_size = zinfo.compress_size 12720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if zinfo.header_offset > ZIP64_LIMIT: 12740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra.append(zinfo.header_offset) 12750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao header_offset = 0xffffffffL 12760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao header_offset = zinfo.header_offset 12780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra_data = zinfo.extra 12800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if extra: 12810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Append a ZIP64 field to the extra's 12820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extra_data = struct.pack( 12830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao '<HH' + 'Q'*len(extra), 12840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1, 8*len(extra), *extra) + extra_data 12850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extract_version = max(45, zinfo.extract_version) 12870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao create_version = max(45, zinfo.create_version) 12880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao extract_version = zinfo.extract_version 12900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao create_version = zinfo.create_version 12910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filename, flag_bits = zinfo._encodeFilenameFlags() 12940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centdir = struct.pack(structCentralDir, 12950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stringCentralDir, create_version, 12960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.create_system, extract_version, zinfo.reserved, 12970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao flag_bits, zinfo.compress_type, dostime, dosdate, 12980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.CRC, compress_size, file_size, 12990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao len(filename), len(extra_data), len(zinfo.comment), 13000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 0, zinfo.internal_attr, zinfo.external_attr, 13010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao header_offset) 13020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except DeprecationWarning: 13030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print >>sys.stderr, (structCentralDir, 13040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stringCentralDir, create_version, 13050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.create_system, extract_version, zinfo.reserved, 13060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 13070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zinfo.CRC, compress_size, file_size, 13080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao len(zinfo.filename), len(extra_data), len(zinfo.comment), 13090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 0, zinfo.internal_attr, zinfo.external_attr, 13100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao header_offset) 13110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 13120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(centdir) 13130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(filename) 13140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(extra_data) 13150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(zinfo.comment) 13160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos2 = self.fp.tell() 13180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Write end-of-zip-archive record 13190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirCount = count 13200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirSize = pos2 - pos1 13210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirOffset = pos1 13220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if (centDirCount >= ZIP_FILECOUNT_LIMIT or 13230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirOffset > ZIP64_LIMIT or 13240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirSize > ZIP64_LIMIT): 13250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Need to write the ZIP64 end-of-archive records 13260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zip64endrec = struct.pack( 13270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao structEndArchive64, stringEndArchive64, 13280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 44, 45, 45, 0, 0, centDirCount, centDirCount, 13290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirSize, centDirOffset) 13300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(zip64endrec) 13310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zip64locrec = struct.pack( 13330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao structEndArchive64Locator, 13340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stringEndArchive64Locator, 0, pos2, 1) 13350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(zip64locrec) 13360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirCount = min(centDirCount, 0xFFFF) 13370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirSize = min(centDirSize, 0xFFFFFFFF) 13380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirOffset = min(centDirOffset, 0xFFFFFFFF) 13390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao endrec = struct.pack(structEndArchive, stringEndArchive, 13410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 0, 0, centDirCount, centDirCount, 13420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao centDirSize, centDirOffset, len(self._comment)) 13430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(endrec) 13440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.write(self._comment) 13450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp.flush() 13460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 13470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp = self.fp 13480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fp = None 13490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._filePassed: 13500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.close() 13510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13530a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass PyZipFile(ZipFile): 13540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Class to create ZIP archives with Python library files and packages.""" 13550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def writepy(self, pathname, basename = ""): 13570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Add all files from "pathname" to the ZIP archive. 13580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If pathname is a package directory, search the directory and 13600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao all package subdirectories recursively for all *.py and enter 13610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the modules into the archive. If pathname is a plain 13620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directory, listdir *.py and enter all modules. Else, pathname 13630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao must be a Python *.py file and the module will be put into the 13640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao archive. Added modules are always module.pyo or module.pyc. 13650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao This method will compile the module.py into module.pyc if 13660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao necessary. 13670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 13680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dir, name = os.path.split(pathname) 13690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.isdir(pathname): 13700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao initname = os.path.join(pathname, "__init__.py") 13710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.isfile(initname): 13720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This is a package directory, add it 13730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if basename: 13740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao basename = "%s/%s" % (basename, name) 13750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 13760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao basename = name 13770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 13780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Adding package in", pathname, "as", basename 13790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname, arcname = self._get_codename(initname[0:-3], basename) 13800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 13810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Adding", arcname 13820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.write(fname, arcname) 13830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dirlist = os.listdir(pathname) 13840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dirlist.remove("__init__.py") 13850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Add all *.py files and package subdirectories 13860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for filename in dirlist: 13870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = os.path.join(pathname, filename) 13880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao root, ext = os.path.splitext(filename) 13890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.isdir(path): 13900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.isfile(os.path.join(path, "__init__.py")): 13910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This is a package directory, add it 13920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.writepy(path, basename) # Recursive call 13930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif ext == ".py": 13940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname, arcname = self._get_codename(path[0:-3], 13950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao basename) 13960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 13970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Adding", arcname 13980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.write(fname, arcname) 13990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 14000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This is NOT a package directory, add its files at top level 14010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 14020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Adding files from directory", pathname 14030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for filename in os.listdir(pathname): 14040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = os.path.join(pathname, filename) 14050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao root, ext = os.path.splitext(filename) 14060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ext == ".py": 14070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname, arcname = self._get_codename(path[0:-3], 14080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao basename) 14090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 14100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Adding", arcname 14110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.write(fname, arcname) 14120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 14130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pathname[-3:] != ".py": 14140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise RuntimeError, \ 14150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'Files added with writepy() must end with ".py"' 14160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname, arcname = self._get_codename(pathname[0:-3], basename) 14170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 14180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Adding file", arcname 14190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.write(fname, arcname) 14200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _get_codename(self, pathname, basename): 14220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return (filename, archivename) for the path. 14230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Given a module name path, return the correct file path and 14250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao archive name, compiling if necessary. For example, given 14260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao /python/lib/string, return (/python/lib/string.pyc, string). 14270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 14280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_py = pathname + ".py" 14290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_pyc = pathname + ".pyc" 14300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file_pyo = pathname + ".pyo" 14310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.isfile(file_pyo) and \ 14320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: 14330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname = file_pyo # Use .pyo file 14340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif not os.path.isfile(file_pyc) or \ 14350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: 14360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import py_compile 14370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.debug: 14380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Compiling", file_py 14390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao py_compile.compile(file_py, file_pyc, None, True) 14410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except py_compile.PyCompileError,err: 14420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print err.msg 14430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname = file_pyc 14440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 14450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fname = file_pyc 14460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao archivename = os.path.split(fname)[1] 14470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if basename: 14480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao archivename = "%s/%s" % (basename, archivename) 14490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return (fname, archivename) 14500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14520a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef main(args = None): 14530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import textwrap 14540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao USAGE=textwrap.dedent("""\ 14550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Usage: 14560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zipfile.py -l zipfile.zip # Show listing of a zipfile 14570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zipfile.py -t zipfile.zip # Test if a zipfile is valid 14580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zipfile.py -e zipfile.zip target # Extract zipfile into target dir 14590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zipfile.py -c zipfile.zip src ... # Create zipfile from sources 14600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """) 14610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if args is None: 14620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao args = sys.argv[1:] 14630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not args or args[0] not in ('-l', '-c', '-e', '-t'): 14650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print USAGE 14660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sys.exit(1) 14670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if args[0] == '-l': 14690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(args) != 2: 14700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print USAGE 14710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sys.exit(1) 14720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with ZipFile(args[1], 'r') as zf: 14730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zf.printdir() 14740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif args[0] == '-t': 14760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(args) != 2: 14770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print USAGE 14780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sys.exit(1) 14790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with ZipFile(args[1], 'r') as zf: 14800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao badfile = zf.testzip() 14810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if badfile: 14820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print("The following enclosed file is corrupted: {!r}".format(badfile)) 14830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "Done testing" 14840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif args[0] == '-e': 14860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(args) != 3: 14870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print USAGE 14880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sys.exit(1) 14890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with ZipFile(args[1], 'r') as zf: 14910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao out = args[2] 14920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for path in zf.namelist(): 14930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if path.startswith('./'): 14940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tgt = os.path.join(out, path[2:]) 14950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 14960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tgt = os.path.join(out, path) 14970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tgtdir = os.path.dirname(tgt) 14990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not os.path.exists(tgtdir): 15000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.makedirs(tgtdir) 15010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with open(tgt, 'wb') as fp: 15020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fp.write(zf.read(path)) 15030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif args[0] == '-c': 15050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(args) < 3: 15060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print USAGE 15070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sys.exit(1) 15080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def addToZip(zf, path, zippath): 15100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.isfile(path): 15110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao zf.write(path, zippath, ZIP_DEFLATED) 15120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif os.path.isdir(path): 15130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for nm in os.listdir(path): 15140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addToZip(zf, 15150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.path.join(path, nm), os.path.join(zippath, nm)) 15160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # else: ignore 15170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with ZipFile(args[1], 'w', allowZip64=True) as zf: 15190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for src in args[2:]: 15200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addToZip(zf, src, os.path.basename(src)) 15210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15220a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif __name__ == "__main__": 15230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao main() 1524