10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#!/usr/bin/env python 20a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -*- coding: iso-8859-1 -*- 30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------------------------------------------------------- 40a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# tarfile.py 50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------------------------------------------------------- 60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de> 70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# All rights reserved. 80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Permission is hereby granted, free of charge, to any person 100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# obtaining a copy of this software and associated documentation 110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# files (the "Software"), to deal in the Software without 120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# restriction, including without limitation the rights to use, 130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# copy, modify, merge, publish, distribute, sublicense, and/or sell 140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# copies of the Software, and to permit persons to whom the 150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Software is furnished to do so, subject to the following 160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# conditions: 170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The above copyright notice and this permission notice shall be 190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# included in all copies or substantial portions of the Software. 200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# OTHER DEALINGS IN THE SOFTWARE. 290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# 300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""Read from and write to tar format archives. 310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao""" 320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__version__ = "$Revision: 85213 $" 340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# $Source$ 350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 360a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoversion = "0.9.0" 370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__author__ = "Lars Gust�bel (lars@gustaebel.de)" 380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__date__ = "$Date$" 390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__cvsid__ = "$Id$" 400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__credits__ = "Gustavo Niemeyer, Niels Gust�bel, Richard Townsend." 410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------- 430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Imports 440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------- 450a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys 460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport os 470a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport shutil 480a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport stat 490a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport errno 500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport time 510a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport struct 520a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport copy 530a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport re 540a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport operator 550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 560a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry: 570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import grp, pwd 580a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept ImportError: 590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao grp = pwd = None 600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# from tarfile import * 620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] 630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# tar constants 660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 670a8c90248264a8b26970b4473770bcc3df8515fJosh GaoNUL = "\0" # the null character 680a8c90248264a8b26970b4473770bcc3df8515fJosh GaoBLOCKSIZE = 512 # length of processing blocks 690a8c90248264a8b26970b4473770bcc3df8515fJosh GaoRECORDSIZE = BLOCKSIZE * 20 # length of records 700a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNU_MAGIC = "ustar \0" # magic gnu tar string 710a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPOSIX_MAGIC = "ustar\x0000" # magic posix tar string 720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 730a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLENGTH_NAME = 100 # maximum length of a filename 740a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLENGTH_LINK = 100 # maximum length of a linkname 750a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLENGTH_PREFIX = 155 # maximum length of the prefix field 760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 770a8c90248264a8b26970b4473770bcc3df8515fJosh GaoREGTYPE = "0" # regular file 780a8c90248264a8b26970b4473770bcc3df8515fJosh GaoAREGTYPE = "\0" # regular file 790a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLNKTYPE = "1" # link (inside tarfile) 800a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSYMTYPE = "2" # symbolic link 810a8c90248264a8b26970b4473770bcc3df8515fJosh GaoCHRTYPE = "3" # character special device 820a8c90248264a8b26970b4473770bcc3df8515fJosh GaoBLKTYPE = "4" # block special device 830a8c90248264a8b26970b4473770bcc3df8515fJosh GaoDIRTYPE = "5" # directory 840a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFIFOTYPE = "6" # fifo special device 850a8c90248264a8b26970b4473770bcc3df8515fJosh GaoCONTTYPE = "7" # contiguous file 860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 870a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNUTYPE_LONGNAME = "L" # GNU tar longname 880a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNUTYPE_LONGLINK = "K" # GNU tar longlink 890a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNUTYPE_SPARSE = "S" # GNU tar sparse file 900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 910a8c90248264a8b26970b4473770bcc3df8515fJosh GaoXHDTYPE = "x" # POSIX.1-2001 extended header 920a8c90248264a8b26970b4473770bcc3df8515fJosh GaoXGLTYPE = "g" # POSIX.1-2001 global header 930a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSOLARIS_XHDTYPE = "X" # Solaris extended header 940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 950a8c90248264a8b26970b4473770bcc3df8515fJosh GaoUSTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 960a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNU_FORMAT = 1 # GNU tar format 970a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPAX_FORMAT = 2 # POSIX.1-2001 (pax) format 980a8c90248264a8b26970b4473770bcc3df8515fJosh GaoDEFAULT_FORMAT = GNU_FORMAT 990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# tarfile constants 1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# File types that tarfile supports: 1040a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao SYMTYPE, DIRTYPE, FIFOTYPE, 1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao CONTTYPE, CHRTYPE, BLKTYPE, 1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao GNUTYPE_SPARSE) 1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# File types that will be treated as a regular file. 1110a8c90248264a8b26970b4473770bcc3df8515fJosh GaoREGULAR_TYPES = (REGTYPE, AREGTYPE, 1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao CONTTYPE, GNUTYPE_SPARSE) 1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# File types that are part of the GNU tar format. 1150a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao GNUTYPE_SPARSE) 1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Fields from a pax header that override a TarInfo attribute. 1190a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPAX_FIELDS = ("path", "linkpath", "size", "mtime", 1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "uid", "gid", "uname", "gname") 1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Fields in a pax header that are numbers, all other fields 1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# are treated as strings. 1240a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPAX_NUMBER_FIELDS = { 1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "atime": float, 1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "ctime": float, 1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "mtime": float, 1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "uid": int, 1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "gid": int, 1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "size": int 1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao} 1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Bits used in the mode field, values in octal. 1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1360a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFLNK = 0120000 # symbolic link 1370a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFREG = 0100000 # regular file 1380a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFBLK = 0060000 # block device 1390a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFDIR = 0040000 # directory 1400a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFCHR = 0020000 # character device 1410a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFIFO = 0010000 # fifo 1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1430a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTSUID = 04000 # set UID on execution 1440a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTSGID = 02000 # set GID on execution 1450a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTSVTX = 01000 # reserved 1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1470a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTUREAD = 0400 # read by owner 1480a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTUWRITE = 0200 # write by owner 1490a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTUEXEC = 0100 # execute/search by owner 1500a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTGREAD = 0040 # read by group 1510a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTGWRITE = 0020 # write by group 1520a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTGEXEC = 0010 # execute/search by group 1530a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTOREAD = 0004 # read by other 1540a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTOWRITE = 0002 # write by other 1550a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTOEXEC = 0001 # execute/search by other 1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# initialization 1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1600a8c90248264a8b26970b4473770bcc3df8515fJosh GaoENCODING = sys.getfilesystemencoding() 1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif ENCODING is None: 1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ENCODING = sys.getdefaultencoding() 1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Some useful functions 1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------------------- 1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef stn(s, length): 1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Convert a python string to a null-terminated string buffer. 1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s[:length] + (length - len(s)) * NUL 1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef nts(s): 1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Convert a null-terminated string field to a python string. 1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Use the string up to the first null char. 1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao p = s.find("\0") 1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if p == -1: 1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s 1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s[:p] 1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef nti(s): 1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Convert a number field to a python number. 1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # There are two possible encodings for a number field, see 1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # itn() below. 1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if s[0] != chr(0200): 1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = int(nts(s) or "0", 8) 1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ValueError: 1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise InvalidHeaderError("invalid header") 1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = 0L 1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in xrange(len(s) - 1): 1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n <<= 8 1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n += ord(s[i + 1]) 1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return n 1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef itn(n, digits=8, format=DEFAULT_FORMAT): 2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Convert a python number to a number field. 2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # POSIX 1003.1-1988 requires numbers to be encoded as a string of 2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # octal digits followed by a null-byte, this allows values up to 2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (8**(digits-1))-1. GNU tar allows storing numbers greater than 2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # that if necessary. A leading 0200 byte indicates this particular 2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # encoding, the following digits-1 bytes are a big-endian 2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # representation. This allows values up to (256**(digits-1))-1. 2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if 0 <= n < 8 ** (digits - 1): 2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "%0*o" % (digits - 1, n) + NUL 2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if format != GNU_FORMAT or n >= 256 ** (digits - 1): 2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("overflow in number field") 2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if n < 0: 2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # XXX We mimic GNU tar's behaviour with negative numbers, 2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # this could raise OverflowError. 2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = struct.unpack("L", struct.pack("l", n))[0] 2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = "" 2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in xrange(digits - 1): 2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = chr(n & 0377) + s 2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n >>= 8 2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = chr(0200) + s 2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s 2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef uts(s, encoding, errors): 2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Convert a unicode object to a string. 2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if errors == "utf-8": 2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # An extra error handler similar to the -o invalid=UTF-8 option 2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # in POSIX.1-2001. Replace untranslatable characters with their 2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # UTF-8 representation. 2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s.encode(encoding, "strict") 2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeEncodeError: 2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x = [] 2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for c in s: 2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.append(c.encode(encoding, "strict")) 2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeEncodeError: 2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x.append(c.encode("utf8")) 2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "".join(x) 2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return s.encode(encoding, errors) 2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef calc_chksums(buf): 2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Calculate the checksum for a member's header by summing up all 2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao characters except for the chksum field which is treated as if 2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao it was filled with spaces. According to the GNU tar sources, 2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao some tars (Sun and NeXT) calculate chksum with signed char, 2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao which will be different if there are chars in the buffer with 2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the high bit set. So we calculate two checksums, unsigned and 2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao signed. 2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) 2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) 2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return unsigned_chksum, signed_chksum 2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef copyfileobj(src, dst, length=None): 2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Copy length bytes from fileobj src to fileobj dst. 2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If length is None, copy the entire content. 2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if length == 0: 2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if length is None: 2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao shutil.copyfileobj(src, dst) 2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao BUFSIZE = 16 * 1024 2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks, remainder = divmod(length, BUFSIZE) 2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for b in xrange(blocks): 2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = src.read(BUFSIZE) 2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(buf) < BUFSIZE: 2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError("end of file reached") 2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dst.write(buf) 2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if remainder != 0: 2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = src.read(remainder) 2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(buf) < remainder: 2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError("end of file reached") 2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dst.write(buf) 2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofilemode_table = ( 2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((S_IFLNK, "l"), 2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (S_IFREG, "-"), 2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (S_IFBLK, "b"), 2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (S_IFDIR, "d"), 2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (S_IFCHR, "c"), 2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (S_IFIFO, "p")), 2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TUREAD, "r"),), 2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TUWRITE, "w"),), 2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TUEXEC|TSUID, "s"), 2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (TSUID, "S"), 2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (TUEXEC, "x")), 2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TGREAD, "r"),), 2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TGWRITE, "w"),), 3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TGEXEC|TSGID, "s"), 3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (TSGID, "S"), 3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (TGEXEC, "x")), 3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TOREAD, "r"),), 3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TOWRITE, "w"),), 3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ((TOEXEC|TSVTX, "t"), 3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (TSVTX, "T"), 3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (TOEXEC, "x")) 3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao) 3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef filemode(mode): 3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Convert a file's mode to a string of the form 3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao -rwxrwxrwx. 3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Used by TarFile.list() 3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao perm = [] 3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for table in filemode_table: 3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for bit, char in table: 3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode & bit == bit: 3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao perm.append(char) 3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao perm.append("-") 3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "".join(perm) 3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarError(Exception): 3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Base exception.""" 3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExtractError(TarError): 3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """General exception for extract errors.""" 3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ReadError(TarError): 3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for unreadble tar archives.""" 3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass CompressionError(TarError): 3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for unavailable compression methods.""" 3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass StreamError(TarError): 3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for unsupported operations on stream-like TarFiles.""" 3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass HeaderError(TarError): 3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Base exception for header errors.""" 3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass EmptyHeaderError(HeaderError): 3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for empty headers.""" 3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TruncatedHeaderError(HeaderError): 3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for truncated headers.""" 3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass EOFHeaderError(HeaderError): 3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for end of file headers.""" 3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass InvalidHeaderError(HeaderError): 3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for invalid headers.""" 3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass SubsequentHeaderError(HeaderError): 3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Exception for missing and invalid extended headers.""" 3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------- 3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# internal stream interface 3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------- 3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _LowLevelFile: 3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Low-level file object. Supports reading and writing. 3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao It is used instead of a regular file object for streaming 3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao access. 3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, name, mode): 3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode = { 3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "r": os.O_RDONLY, 3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao }[mode] 3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(os, "O_BINARY"): 3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode |= os.O_BINARY 3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fd = os.open(name, mode, 0666) 3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.close(self.fd) 3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, size): 3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return os.read(self.fd, size) 3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def write(self, s): 3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.write(self.fd, s) 3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _Stream: 3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Class that serves as an adapter between TarFile and 3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a stream-like object. The stream-like object only 3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao needs to have a read() or write() method and is accessed 3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blockwise. Use of gzip or bzip2 compression is possible. 3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao A stream-like object could be for example: sys.stdin, 3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sys.stdout, a socket, a tape device etc. 3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _Stream is intended to be used only internally. 3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, name, mode, comptype, fileobj, bufsize): 3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Construct a _Stream object. 4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._extfileobj = True 4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is None: 4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobj = _LowLevelFile(name, mode) 4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._extfileobj = False 4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if comptype == '*': 4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Enable transparent compression detection for the 4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # stream interface 4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobj = _StreamProxy(fileobj) 4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao comptype = fileobj.getcomptype() 4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = name or "" 4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = mode 4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.comptype = comptype 4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj = fileobj 4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.bufsize = bufsize 4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = "" 4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos = 0L 4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = False 4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if comptype == "gz": 4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import zlib 4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise CompressionError("zlib module is not available") 4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.zlib = zlib 4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.crc = zlib.crc32("") & 0xffffffffL 4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode == "r": 4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._init_read_gz() 4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._init_write_gz() 4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if comptype == "bz2": 4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import bz2 4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise CompressionError("bz2 module is not available") 4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode == "r": 4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.dbuf = "" 4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.cmp = bz2.BZ2Decompressor() 4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.cmp = bz2.BZ2Compressor() 4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __del__(self): 4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(self, "closed") and not self.closed: 4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.close() 4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _init_write_gz(self): 4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Initialize for writing with gzip compression. 4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao -self.zlib.MAX_WBITS, 4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.zlib.DEF_MEM_LEVEL, 4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 0) 4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao timestamp = struct.pack("<L", long(time.time())) 4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__write("\037\213\010\010%s\002\377" % timestamp) 4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type(self.name) is unicode: 4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = self.name.encode("iso-8859-1", "replace") 4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.name.endswith(".gz"): 4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = self.name[:-3] 4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__write(self.name + NUL) 4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def write(self, s): 4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Write string s to the stream. 4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.comptype == "gz": 4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL 4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos += len(s) 4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.comptype != "tar": 4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = self.cmp.compress(s) 4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__write(s) 4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __write(self, s): 4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Write string s to the stream if a whole new block 4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao is ready to be written. 4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf += s 4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while len(self.buf) > self.bufsize: 4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(self.buf[:self.bufsize]) 4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = self.buf[self.bufsize:] 4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Close the _Stream object. No operation should be 4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao done on it afterwards. 4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closed: 4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode == "w" and self.comptype != "tar": 4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf += self.cmp.flush() 4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode == "w" and self.buf: 4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(self.buf) 4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = "" 4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.comptype == "gz": 4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The native zlib crc is an unsigned 32-bit integer, but 4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the Python wrapper implicitly casts that to a signed C 4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # long. So, on a 32-bit box self.crc may "look negative", 4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # while the same crc on a 64-bit box may "look positive". 5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # To avoid irksome warnings from the `struct` module, force 5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # it to look positive on all boxes. 5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL)) 5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) 5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._extfileobj: 5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.close() 5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = True 5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _init_read_gz(self): 5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Initialize for reading a gzip compressed fileobj. 5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.dbuf = "" 5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # taken from gzip.GzipFile with some alterations 5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.__read(2) != "\037\213": 5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError("not a gzip file") 5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.__read(1) != "\010": 5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise CompressionError("unsupported compression method") 5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao flag = ord(self.__read(1)) 5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__read(6) 5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if flag & 4: 5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.read(xlen) 5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if flag & 8: 5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = self.__read(1) 5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not s or s == NUL: 5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if flag & 16: 5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao s = self.__read(1) 5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not s or s == NUL: 5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if flag & 2: 5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.__read(2) 5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def tell(self): 5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the stream's file pointer position. 5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.pos 5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def seek(self, pos=0): 5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Set the stream's file pointer to pos. Negative seeking 5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao is forbidden. 5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pos - self.pos >= 0: 5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks, remainder = divmod(pos - self.pos, self.bufsize) 5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in xrange(blocks): 5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.read(self.bufsize) 5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.read(remainder) 5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise StreamError("seeking backwards is not allowed") 5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.pos 5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, size=None): 5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the next size number of bytes from the stream. 5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If size is not defined, return all bytes of the stream 5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao up to EOF. 5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if size is None: 5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = [] 5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self._read(self.bufsize) 5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not buf: 5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t.append(buf) 5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = "".join(t) 5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self._read(size) 5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos += len(buf) 5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf 5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _read(self, size): 5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return size bytes from the stream. 5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.comptype == "tar": 5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.__read(size) 5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao c = len(self.dbuf) 5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = [self.dbuf] 5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while c < size: 5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.__read(self.bufsize) 5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not buf: 5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.cmp.decompress(buf) 5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError("invalid compressed data") 5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t.append(buf) 5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao c += len(buf) 5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = "".join(t) 5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.dbuf = t[size:] 5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return t[:size] 5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __read(self, size): 6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return size bytes from stream. If internal buffer is empty, 6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao read another block from the stream. 6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao c = len(self.buf) 6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = [self.buf] 6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while c < size: 6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.fileobj.read(self.bufsize) 6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not buf: 6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t.append(buf) 6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao c += len(buf) 6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = "".join(t) 6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = t[size:] 6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return t[:size] 6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class _Stream 6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _StreamProxy(object): 6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Small proxy class that enables transparent compression 6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao detection for the Stream interface (mode 'r|*'). 6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fileobj): 6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj = fileobj 6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = self.fileobj.read(BLOCKSIZE) 6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, size): 6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.read = self.fileobj.read 6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.buf 6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getcomptype(self): 6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.buf.startswith("\037\213\010"): 6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "gz" 6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": 6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "bz2" 6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "tar" 6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.close() 6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class StreamProxy 6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _BZ2Proxy(object): 6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Small proxy class that enables external file object 6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao support for "r:bz2" and "w:bz2" modes. This is actually 6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a workaround for a limitation in bz2 module's BZ2File 6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao class which (unlike gzip.GzipFile) has no support for 6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a file object argument. 6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocksize = 16 * 1024 6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fileobj, mode): 6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj = fileobj 6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = mode 6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = getattr(self.fileobj, "name", None) 6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.init() 6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def init(self): 6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import bz2 6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos = 0 6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode == "r": 6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.bz2obj = bz2.BZ2Decompressor() 6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.seek(0) 6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = "" 6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.bz2obj = bz2.BZ2Compressor() 6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, size): 6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao b = [self.buf] 6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x = len(self.buf) 6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while x < size: 6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raw = self.fileobj.read(self.blocksize) 6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not raw: 6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = self.bz2obj.decompress(raw) 6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao b.append(data) 6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao x += len(data) 6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = "".join(b) 6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.buf[:size] 6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buf = self.buf[size:] 6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos += len(buf) 6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf 6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def seek(self, pos): 6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pos < self.pos: 6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.init() 6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.read(pos - self.pos) 6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def tell(self): 6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.pos 6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def write(self, data): 6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pos += len(data) 6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raw = self.bz2obj.compress(data) 6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(raw) 6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode == "w": 6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raw = self.bz2obj.flush() 6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(raw) 7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class _BZ2Proxy 7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------------ 7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Extraction file object 7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------------ 7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _FileInFile(object): 7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """A thin wrapper around an existing file object that 7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao provides a part of its data as an individual file 7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao object. 7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, fileobj, offset, size, sparse=None): 7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj = fileobj 7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset = offset 7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.size = size 7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.sparse = sparse 7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position = 0 7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def tell(self): 7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the current file position. 7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.position 7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def seek(self, position): 7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Seek to a position in the file. 7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position = position 7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, size=None): 7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read data from the file. 7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if size is None: 7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size = self.size - self.position 7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size = min(size, self.size - self.position) 7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.sparse is None: 7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.readnormal(size) 7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.readsparse(size) 7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def readnormal(self, size): 7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read operation for regular files. 7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.seek(self.offset + self.position) 7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position += size 7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.fileobj.read(size) 7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def readsparse(self, size): 7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read operation for sparse files. 7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data = [] 7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while size > 0: 7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.readsparsesection(size) 7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not buf: 7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size -= len(buf) 7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao data.append(buf) 7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "".join(data) 7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def readsparsesection(self, size): 7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read a single section of a sparse file. 7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao section = self.sparse.find(self.position) 7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if section is None: 7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "" 7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size = min(size, section.offset + section.size - self.position) 7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(section, _data): 7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realpos = section.realpos + self.position - section.offset 7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.seek(self.offset + realpos) 7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position += size 7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.fileobj.read(size) 7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position += size 7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return NUL * size 7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#class _FileInFile 7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExFileObject(object): 7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """File-like object for reading an archive member. 7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Is returned by TarFile.extractfile(). 7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocksize = 1024 7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, tarfile, tarinfo): 7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj = _FileInFile(tarfile.fileobj, 7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.offset_data, 7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.size, 7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao getattr(tarinfo, "sparse", None)) 7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = tarinfo.name 7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = "r" 7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = False 7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.size = tarinfo.size 7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position = 0 7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buffer = "" 7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, size=None): 8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read at most size bytes from the file. If size is not 8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao present or None, read all data until EOF is reached. 8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closed: 8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("I/O operation on closed file") 8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = "" 8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.buffer: 8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if size is None: 8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.buffer 8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buffer = "" 8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.buffer[:size] 8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buffer = self.buffer[size:] 8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if size is None: 8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf += self.fileobj.read() 8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf += self.fileobj.read(size - len(buf)) 8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position += len(buf) 8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf 8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def readline(self, size=-1): 8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read one entire line from the file. If size is present 8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and non-negative, return a string with at most that 8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao size, which may be an incomplete line. 8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closed: 8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("I/O operation on closed file") 8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "\n" in self.buffer: 8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos = self.buffer.find("\n") + 1 8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buffers = [self.buffer] 8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.fileobj.read(self.blocksize) 8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buffers.append(buf) 8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not buf or "\n" in buf: 8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buffer = "".join(buffers) 8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos = self.buffer.find("\n") + 1 8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pos == 0: 8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # no newline found. 8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos = len(self.buffer) 8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if size != -1: 8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos = min(size, pos) 8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.buffer[:pos] 8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buffer = self.buffer[pos:] 8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position += len(buf) 8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf 8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def readlines(self): 8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a list with all remaining lines. 8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result = [] 8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao line = self.readline() 8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not line: break 8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao result.append(line) 8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return result 8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def tell(self): 8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the current file position. 8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closed: 8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("I/O operation on closed file") 8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.position 8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def seek(self, pos, whence=os.SEEK_SET): 8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Seek to a position in the file. 8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closed: 8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("I/O operation on closed file") 8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if whence == os.SEEK_SET: 8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position = min(max(pos, 0), self.size) 8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif whence == os.SEEK_CUR: 8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pos < 0: 8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position = max(self.position + pos, 0) 8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position = min(self.position + pos, self.size) 8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif whence == os.SEEK_END: 8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.position = max(min(self.size + pos, self.size), 0) 8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("Invalid argument") 8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.buffer = "" 8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.seek(self.position) 8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Close the file object. 8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = True 8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __iter__(self): 9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Get an iterator over the file's lines. 9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao line = self.readline() 9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not line: 9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao yield line 9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#class ExFileObject 9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------ 9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Exported Classes 9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------ 9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarInfo(object): 9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Informational class which holds the details about an 9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao archive member given by a tar header block. 9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TarInfo objects are returned by TarFile.getmember(), 9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TarFile.getmembers() and TarFile.gettarinfo() and are 9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao usually created internally. 9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, name=""): 9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Construct a TarInfo object. name is the optional name 9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao of the member. 9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = name # member name 9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = 0644 # file permissions 9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.uid = 0 # user id 9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.gid = 0 # group id 9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.size = 0 # file size 9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mtime = 0 # modification time 9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.chksum = 0 # header checksum 9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.type = REGTYPE # member type 9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.linkname = "" # link name 9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.uname = "" # user name 9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.gname = "" # group name 9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.devmajor = 0 # device major number 9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.devminor = 0 # device minor number 9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset = 0 # the tar header starts here 9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset_data = 0 # the file's data starts here 9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pax_headers = {} # pax header information 9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # In pax headers the "name" and "linkname" field are called 9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "path" and "linkpath". 9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _getpath(self): 9460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.name 9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _setpath(self, name): 9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = name 9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao path = property(_getpath, _setpath) 9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _getlinkpath(self): 9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.linkname 9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _setlinkpath(self, linkname): 9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.linkname = linkname 9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao linkpath = property(_getlinkpath, _setlinkpath) 9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __repr__(self): 9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def get_info(self, encoding, errors): 9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the TarInfo's attributes as a dictionary. 9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = { 9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "name": self.name, 9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "mode": self.mode & 07777, 9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "uid": self.uid, 9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "gid": self.gid, 9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "size": self.size, 9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "mtime": self.mtime, 9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "chksum": self.chksum, 9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "type": self.type, 9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "linkname": self.linkname, 9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "uname": self.uname, 9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "gname": self.gname, 9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "devmajor": self.devmajor, 9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "devminor": self.devminor 9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao } 9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if info["type"] == DIRTYPE and not info["name"].endswith("/"): 9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["name"] += "/" 9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for key in ("name", "linkname", "uname", "gname"): 9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type(info[key]) is unicode: 9840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info[key] = info[key].encode(encoding, errors) 9850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return info 9870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): 9890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a tar header as a string of 512 byte blocks. 9900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 9910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = self.get_info(encoding, errors) 9920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 9930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if format == USTAR_FORMAT: 9940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.create_ustar_header(info) 9950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif format == GNU_FORMAT: 9960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.create_gnu_header(info) 9970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif format == PAX_FORMAT: 9980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.create_pax_header(info, encoding, errors) 9990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 10000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("invalid format") 10010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def create_ustar_header(self, info): 10030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the object as a ustar header block. 10040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["magic"] = POSIX_MAGIC 10060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(info["linkname"]) > LENGTH_LINK: 10080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("linkname is too long") 10090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(info["name"]) > LENGTH_NAME: 10110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["prefix"], info["name"] = self._posix_split_name(info["name"]) 10120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._create_header(info, USTAR_FORMAT) 10140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def create_gnu_header(self, info): 10160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the object as a GNU header block sequence. 10170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["magic"] = GNU_MAGIC 10190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = "" 10210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(info["linkname"]) > LENGTH_LINK: 10220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) 10230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(info["name"]) > LENGTH_NAME: 10250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) 10260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf + self._create_header(info, GNU_FORMAT) 10280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def create_pax_header(self, info, encoding, errors): 10300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the object as a ustar header block. If it cannot be 10310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao represented this way, prepend a pax extended header sequence 10320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with supplement information. 10330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["magic"] = POSIX_MAGIC 10350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax_headers = self.pax_headers.copy() 10360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Test string fields for values that exceed the field length or cannot 10380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # be represented in ASCII encoding. 10390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for name, hname, length in ( 10400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 10410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ("uname", "uname", 32), ("gname", "gname", 32)): 10420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hname in pax_headers: 10440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The pax header has priority. 10450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 10460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao val = info[name].decode(encoding, errors) 10480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Try to encode the string as ASCII. 10500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 10510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao val.encode("ascii") 10520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except UnicodeEncodeError: 10530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax_headers[hname] = val 10540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 10550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(info[name]) > length: 10570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax_headers[hname] = val 10580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Test number fields for values that exceed the field limit or values 10600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # that like to be stored as float. 10610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 10620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if name in pax_headers: 10630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The pax header has priority. Avoid overflow. 10640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info[name] = 0 10650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 10660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao val = info[name] 10680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): 10690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax_headers[name] = unicode(val) 10700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info[name] = 0 10710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create a pax extended header if necessary. 10730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pax_headers: 10740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self._create_pax_generic_header(pax_headers) 10750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 10760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = "" 10770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf + self._create_header(info, USTAR_FORMAT) 10790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 10810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def create_pax_global_header(cls, pax_headers): 10820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the object as a pax global header block sequence. 10830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) 10850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _posix_split_name(self, name): 10870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Split a name longer than 100 chars into a prefix 10880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao and a name part. 10890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 10900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = name[:LENGTH_PREFIX + 1] 10910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while prefix and prefix[-1] != "/": 10920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = prefix[:-1] 10930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = name[len(prefix):] 10950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = prefix[:-1] 10960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 10970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not prefix or len(name) > LENGTH_NAME: 10980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("name is too long") 10990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return prefix, name 11000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @staticmethod 11020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _create_header(info, format): 11030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a header block. info is a dictionary with file 11040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao information, format must be one of the *_FORMAT constants. 11050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 11060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao parts = [ 11070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stn(info.get("name", ""), 100), 11080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao itn(info.get("mode", 0) & 07777, 8, format), 11090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao itn(info.get("uid", 0), 8, format), 11100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao itn(info.get("gid", 0), 8, format), 11110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao itn(info.get("size", 0), 12, format), 11120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao itn(info.get("mtime", 0), 12, format), 11130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao " ", # checksum field 11140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info.get("type", REGTYPE), 11150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stn(info.get("linkname", ""), 100), 11160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stn(info.get("magic", POSIX_MAGIC), 8), 11170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stn(info.get("uname", ""), 32), 11180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stn(info.get("gname", ""), 32), 11190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao itn(info.get("devmajor", 0), 8, format), 11200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao itn(info.get("devminor", 0), 8, format), 11210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stn(info.get("prefix", ""), 155) 11220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ] 11230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) 11250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 11260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] 11270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return buf 11280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @staticmethod 11300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _create_payload(payload): 11310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the string payload filled with zero bytes 11320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao up to the next 512 byte border. 11330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 11340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks, remainder = divmod(len(payload), BLOCKSIZE) 11350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if remainder > 0: 11360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao payload += (BLOCKSIZE - remainder) * NUL 11370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return payload 11380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 11400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _create_gnu_long_header(cls, name, type): 11410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 11420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for name. 11430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 11440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name += NUL 11450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = {} 11470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["name"] = "././@LongLink" 11480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["type"] = type 11490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["size"] = len(name) 11500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["magic"] = GNU_MAGIC 11510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # create extended header + name blocks. 11530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return cls._create_header(info, USTAR_FORMAT) + \ 11540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cls._create_payload(name) 11550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 11570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): 11580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a POSIX.1-2001 extended or global header sequence 11590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao that contains a list of keyword, value pairs. The values 11600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao must be unicode objects. 11610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 11620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao records = [] 11630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for keyword, value in pax_headers.iteritems(): 11640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao keyword = keyword.encode("utf8") 11650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = value.encode("utf8") 11660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 11670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = p = 0 11680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 11690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao n = l + len(str(p)) 11700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if n == p: 11710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 11720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao p = n 11730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao records.append("%d %s=%s\n" % (p, keyword, value)) 11740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao records = "".join(records) 11750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # We use a hardcoded "././@PaxHeader" name like star does 11770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # instead of the one that POSIX recommends. 11780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info = {} 11790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["name"] = "././@PaxHeader" 11800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["type"] = type 11810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["size"] = len(records) 11820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao info["magic"] = POSIX_MAGIC 11830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create pax header + record blocks. 11850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return cls._create_header(info, USTAR_FORMAT) + \ 11860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao cls._create_payload(records) 11870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 11890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def frombuf(cls, buf): 11900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Construct a TarInfo object from a 512 byte string buffer. 11910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 11920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(buf) == 0: 11930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise EmptyHeaderError("empty header") 11940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(buf) != BLOCKSIZE: 11950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise TruncatedHeaderError("truncated header") 11960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if buf.count(NUL) == BLOCKSIZE: 11970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise EOFHeaderError("end of file header") 11980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 11990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao chksum = nti(buf[148:156]) 12000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if chksum not in calc_chksums(buf): 12010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise InvalidHeaderError("bad checksum") 12020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj = cls() 12040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.buf = buf 12050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.name = nts(buf[0:100]) 12060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.mode = nti(buf[100:108]) 12070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.uid = nti(buf[108:116]) 12080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.gid = nti(buf[116:124]) 12090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.size = nti(buf[124:136]) 12100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.mtime = nti(buf[136:148]) 12110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.chksum = chksum 12120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.type = buf[156:157] 12130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.linkname = nts(buf[157:257]) 12140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.uname = nts(buf[265:297]) 12150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.gname = nts(buf[297:329]) 12160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.devmajor = nti(buf[329:337]) 12170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.devminor = nti(buf[337:345]) 12180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao prefix = nts(buf[345:500]) 12190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Old V7 tar format represents a directory as a regular 12210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # file with a trailing slash. 12220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if obj.type == AREGTYPE and obj.name.endswith("/"): 12230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.type = DIRTYPE 12240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Remove redundant slashes from directories. 12260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if obj.isdir(): 12270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.name = obj.name.rstrip("/") 12280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Reconstruct a ustar longname. 12300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if prefix and obj.type not in GNU_TYPES: 12310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.name = prefix + "/" + obj.name 12320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return obj 12330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 12350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def fromtarfile(cls, tarfile): 12360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the next TarInfo object from TarFile object 12370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarfile. 12380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 12390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = tarfile.fileobj.read(BLOCKSIZE) 12400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj = cls.frombuf(buf) 12410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 12420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return obj._proc_member(tarfile) 12430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #-------------------------------------------------------------------------- 12450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The following are methods that are called depending on the type of a 12460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # member. The entry point is _proc_member() which can be overridden in a 12470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # subclass to add custom _proc_*() methods. A _proc_*() method MUST 12480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # implement the following 12490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # operations: 12500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 1. Set self.offset_data to the position where the data blocks begin, 12510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # if there is data that follows. 12520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 2. Set tarfile.offset to the position where the next member's header will 12530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # begin. 12540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 3. Return self or another valid TarInfo object. 12550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _proc_member(self, tarfile): 12560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Choose the right processing method depending on 12570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the type and call it. 12580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 12590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 12600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._proc_gnulong(tarfile) 12610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self.type == GNUTYPE_SPARSE: 12620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._proc_sparse(tarfile) 12630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 12640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._proc_pax(tarfile) 12650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 12660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self._proc_builtin(tarfile) 12670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _proc_builtin(self, tarfile): 12690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Process a builtin type or an unknown type which 12700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao will be treated as a regular file. 12710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 12720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset_data = tarfile.fileobj.tell() 12730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao offset = self.offset_data 12740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.isreg() or self.type not in SUPPORTED_TYPES: 12750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Skip the following data blocks. 12760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao offset += self._block(self.size) 12770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarfile.offset = offset 12780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Patch the TarInfo object with saved global 12800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # header information. 12810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 12820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self 12840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _proc_gnulong(self, tarfile): 12860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Process the blocks that hold a GNU longname 12870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao or longlink member. 12880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 12890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = tarfile.fileobj.read(self._block(self.size)) 12900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Fetch the next header and process it. 12920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 12930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao next = self.fromtarfile(tarfile) 12940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except HeaderError: 12950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise SubsequentHeaderError("missing or bad subsequent header") 12960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 12970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Patch the TarInfo object from the next header with 12980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the longname information. 12990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao next.offset = self.offset 13000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.type == GNUTYPE_LONGNAME: 13010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao next.name = nts(buf) 13020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self.type == GNUTYPE_LONGLINK: 13030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao next.linkname = nts(buf) 13040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return next 13060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _proc_sparse(self, tarfile): 13080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Process a GNU sparse header plus extra headers. 13090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 13100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.buf 13110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sp = _ringbuffer() 13120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos = 386 13130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao lastpos = 0L 13140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realpos = 0L 13150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # There are 4 possible sparse structs in the 13160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # first header. 13170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in xrange(4): 13180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 13190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao offset = nti(buf[pos:pos + 12]) 13200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao numbytes = nti(buf[pos + 12:pos + 24]) 13210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ValueError: 13220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 13230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if offset > lastpos: 13240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sp.append(_hole(lastpos, offset - lastpos)) 13250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sp.append(_data(offset, numbytes, realpos)) 13260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realpos += numbytes 13270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao lastpos = offset + numbytes 13280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos += 24 13290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao isextended = ord(buf[482]) 13310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao origsize = nti(buf[483:495]) 13320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If the isextended flag is given, 13340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # there are extra headers to process. 13350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while isextended == 1: 13360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = tarfile.fileobj.read(BLOCKSIZE) 13370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos = 0 13380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for i in xrange(21): 13390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 13400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao offset = nti(buf[pos:pos + 12]) 13410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao numbytes = nti(buf[pos + 12:pos + 24]) 13420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ValueError: 13430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 13440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if offset > lastpos: 13450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sp.append(_hole(lastpos, offset - lastpos)) 13460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sp.append(_data(offset, numbytes, realpos)) 13470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao realpos += numbytes 13480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao lastpos = offset + numbytes 13490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos += 24 13500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao isextended = ord(buf[504]) 13510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if lastpos < origsize: 13530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao sp.append(_hole(lastpos, origsize - lastpos)) 13540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.sparse = sp 13560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset_data = tarfile.fileobj.tell() 13580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarfile.offset = self.offset_data + self._block(self.size) 13590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.size = origsize 13600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self 13620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _proc_pax(self, tarfile): 13640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Process an extended or global header as described in 13650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao POSIX.1-2001. 13660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 13670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Read the header information. 13680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = tarfile.fileobj.read(self._block(self.size)) 13690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # A pax header stores supplemental information for either 13710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the following file (extended) or all following files 13720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (global). 13730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.type == XGLTYPE: 13740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax_headers = tarfile.pax_headers 13750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 13760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax_headers = tarfile.pax_headers.copy() 13770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Parse pax header information. A record looks like that: 13790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # "%d %s=%s\n" % (length, keyword, value). length is the size 13800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # of the complete record including the length field itself and 13810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the newline. keyword and value are both UTF-8 encoded strings. 13820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao regex = re.compile(r"(\d+) ([^=]+)=", re.U) 13830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos = 0 13840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 13850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao match = regex.match(buf, pos) 13860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not match: 13870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 13880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao length, keyword = match.groups() 13900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao length = int(length) 13910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = buf[match.end(2) + 1:match.start(1) + length - 1] 13920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao keyword = keyword.decode("utf8") 13940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = value.decode("utf8") 13950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax_headers[keyword] = value 13970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pos += length 13980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 13990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Fetch the next header. 14000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao next = self.fromtarfile(tarfile) 14020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except HeaderError: 14030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise SubsequentHeaderError("missing or bad subsequent header") 14040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 14060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Patch the TarInfo object with the extended header info. 14070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 14080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao next.offset = self.offset 14090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if "size" in pax_headers: 14110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If the extended header replaces the size field, 14120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # we need to recalculate the offset where the next 14130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # header starts. 14140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao offset = next.offset_data 14150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if next.isreg() or next.type not in SUPPORTED_TYPES: 14160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao offset += next._block(next.size) 14170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarfile.offset = offset 14180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return next 14200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _apply_pax_info(self, pax_headers, encoding, errors): 14220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Replace fields with supplemental information from a previous 14230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pax extended or global header. 14240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 14250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for keyword, value in pax_headers.iteritems(): 14260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if keyword not in PAX_FIELDS: 14270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 14280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if keyword == "path": 14300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = value.rstrip("/") 14310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if keyword in PAX_NUMBER_FIELDS: 14330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 14340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = PAX_NUMBER_FIELDS[keyword](value) 14350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ValueError: 14360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = 0 14370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 14380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao value = uts(value, encoding, errors) 14390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao setattr(self, keyword, value) 14410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pax_headers = pax_headers.copy() 14430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _block(self, count): 14450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Round up a byte count by BLOCKSIZE and return it, 14460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao e.g. _block(834) => 1024. 14470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 14480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks, remainder = divmod(count, BLOCKSIZE) 14490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if remainder: 14500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks += 1 14510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return blocks * BLOCKSIZE 14520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isreg(self): 14540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type in REGULAR_TYPES 14550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isfile(self): 14560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.isreg() 14570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isdir(self): 14580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type == DIRTYPE 14590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def issym(self): 14600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type == SYMTYPE 14610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def islnk(self): 14620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type == LNKTYPE 14630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def ischr(self): 14640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type == CHRTYPE 14650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isblk(self): 14660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type == BLKTYPE 14670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isfifo(self): 14680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type == FIFOTYPE 14690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def issparse(self): 14700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type == GNUTYPE_SPARSE 14710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def isdev(self): 14720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 14730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class TarInfo 14740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14750a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarFile(object): 14760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """The TarFile Class provides an interface to tar archives. 14770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 14780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 14800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dereference = False # If true, add content of linked file to the 14820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # tar file, else the link. 14830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ignore_zeros = False # If true, skips empty or invalid blocks and 14850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # continues processing. 14860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errorlevel = 1 # If 0, fatal errors only appear in debug 14880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # messages (if debug >= 0). If > 0, errors 14890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # are passed to the caller as exceptions. 14900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao format = DEFAULT_FORMAT # The format to use when creating an archive. 14920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao encoding = ENCODING # Encoding for 8-bit character strings. 14940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errors = None # Error handler for unicode conversion. 14960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = TarInfo # The default TarInfo class to use. 14980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 14990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobject = ExFileObject # The default ExFileObject class to use. 15000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, name=None, mode="r", fileobj=None, format=None, 15020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 15030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao errors=None, pax_headers=None, debug=None, errorlevel=None): 15040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 15050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao read from an existing archive, 'a' to append data to an existing 15060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file or 'w' to create a new file overwriting an existing one. `mode' 15070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao defaults to 'r'. 15080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If `fileobj' is given, it is used for reading or writing data. If it 15090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao can be determined, `mode' is overridden by `fileobj's mode. 15100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao `fileobj' is not closed, when TarFile is closed. 15110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 15120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(mode) > 1 or mode not in "raw": 15130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("mode must be 'r', 'a' or 'w'") 15140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = mode 15150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] 15160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not fileobj: 15180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode == "a" and not os.path.exists(name): 15190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create nonexistent files in append mode. 15200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.mode = "w" 15210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._mode = "wb" 15220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobj = bltn_open(name, self._mode) 15230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._extfileobj = False 15240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if name is None and hasattr(fileobj, "name"): 15260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = fileobj.name 15270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(fileobj, "mode"): 15280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._mode = fileobj.mode 15290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._extfileobj = True 15300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.name = os.path.abspath(name) if name else None 15310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj = fileobj 15320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Init attributes. 15340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if format is not None: 15350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.format = format 15360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo is not None: 15370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarinfo = tarinfo 15380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if dereference is not None: 15390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.dereference = dereference 15400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if ignore_zeros is not None: 15410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.ignore_zeros = ignore_zeros 15420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if encoding is not None: 15430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.encoding = encoding 15440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if errors is not None: 15460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.errors = errors 15470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif mode == "r": 15480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.errors = "utf-8" 15490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.errors = "strict" 15510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pax_headers is not None and self.format == PAX_FORMAT: 15530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pax_headers = pax_headers 15540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 15550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.pax_headers = {} 15560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if debug is not None: 15580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.debug = debug 15590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if errorlevel is not None: 15600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.errorlevel = errorlevel 15610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Init datastructures. 15630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = False 15640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.members = [] # list of members as TarInfo objects 15650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._loaded = False # flag if all members have been read 15660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset = self.fileobj.tell() 15670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # current position in the archive file 15680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.inodes = {} # dictionary caching the inodes of 15690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # archive members already added 15700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode == "r": 15730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.firstmember = None 15740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.firstmember = self.next() 15750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode == "a": 15770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Move to the end of the archive, 15780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # before the first empty block. 15790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 15800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.seek(self.offset) 15810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 15820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.tarinfo.fromtarfile(self) 15830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.members.append(tarinfo) 15840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EOFHeaderError: 15850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.seek(self.offset) 15860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 15870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except HeaderError, e: 15880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError(str(e)) 15890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode in "aw": 15910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._loaded = True 15920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 15930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.pax_headers: 15940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 15950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(buf) 15960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset += len(buf) 15970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except: 15980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._extfileobj: 15990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.close() 16000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = True 16010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 16020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _getposix(self): 16040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.format == USTAR_FORMAT 16050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _setposix(self, value): 16060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import warnings 16070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn("use the format attribute instead", DeprecationWarning, 16080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 2) 16090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if value: 16100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.format = USTAR_FORMAT 16110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 16120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.format = GNU_FORMAT 16130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao posix = property(_getposix, _setposix) 16140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #-------------------------------------------------------------------------- 16160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Below are the classmethods which act as alternate constructors to the 16170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # TarFile class. The open() method is the only one that is needed for 16180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # public use; it is the "super"-constructor and is able to select an 16190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # adequate "sub"-constructor for a particular compression using the mapping 16200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # from OPEN_METH. 16210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # 16220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # This concept allows one to subclass TarFile without losing the comfort of 16230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # the super-constructor. A sub-constructor is registered and made available 16240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # by adding it to the mapping in OPEN_METH. 16250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 16270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 16280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Open a tar archive for reading, writing or appending. Return 16290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao an appropriate TarFile class. 16300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode: 16320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r' or 'r:*' open for reading with transparent compression 16330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r:' open for reading exclusively uncompressed 16340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r:gz' open for reading with gzip compression 16350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r:bz2' open for reading with bzip2 compression 16360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'a' or 'a:' open for appending, creating the file if necessary 16370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'w' or 'w:' open for writing without compression 16380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'w:gz' open for writing with gzip compression 16390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'w:bz2' open for writing with bzip2 compression 16400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r|*' open a stream of tar blocks with transparent compression 16420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r|' open an uncompressed stream of tar blocks for reading 16430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r|gz' open a gzip compressed stream of tar blocks 16440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'r|bz2' open a bzip2 compressed stream of tar blocks 16450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'w|' open an uncompressed stream for writing 16460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'w|gz' open a gzip compressed stream for writing 16470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'w|bz2' open a bzip2 compressed stream for writing 16480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 16490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not name and not fileobj: 16510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("nothing to open") 16520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode in ("r", "r:*"): 16540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Find out which *open() is appropriate for opening the file. 16550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for comptype in cls.OPEN_METH: 16560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao func = getattr(cls, cls.OPEN_METH[comptype]) 16570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is not None: 16580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao saved_pos = fileobj.tell() 16590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 16600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return func(name, "r", fileobj, **kwargs) 16610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (ReadError, CompressionError), e: 16620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is not None: 16630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobj.seek(saved_pos) 16640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 16650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError("file could not be opened successfully") 16660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif ":" in mode: 16680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filemode, comptype = mode.split(":", 1) 16690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filemode = filemode or "r" 16700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao comptype = comptype or "tar" 16710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Select the *open() function according to 16730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # given compression. 16740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if comptype in cls.OPEN_METH: 16750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao func = getattr(cls, cls.OPEN_METH[comptype]) 16760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 16770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise CompressionError("unknown compression type %r" % comptype) 16780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return func(name, filemode, fileobj, **kwargs) 16790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif "|" in mode: 16810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filemode, comptype = mode.split("|", 1) 16820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao filemode = filemode or "r" 16830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao comptype = comptype or "tar" 16840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if filemode not in "rw": 16860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("mode must be 'r' or 'w'") 16870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = cls(name, filemode, 16890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _Stream(name, filemode, comptype, fileobj, bufsize), 16900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao **kwargs) 16910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t._extfileobj = False 16920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return t 16930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif mode in "aw": 16950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return cls.taropen(name, mode, fileobj, **kwargs) 16960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("undiscernible mode") 16980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 16990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 17000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def taropen(cls, name, mode="r", fileobj=None, **kwargs): 17010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Open uncompressed tar archive name for reading or writing. 17020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 17030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(mode) > 1 or mode not in "raw": 17040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("mode must be 'r', 'a' or 'w'") 17050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return cls(name, mode, fileobj, **kwargs) 17060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 17080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 17090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Open gzip compressed tar archive name for reading or writing. 17100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Appending is not allowed. 17110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 17120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(mode) > 1 or mode not in "rw": 17130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("mode must be 'r' or 'w'") 17140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 17160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import gzip 17170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao gzip.GzipFile 17180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (ImportError, AttributeError): 17190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise CompressionError("gzip module is not available") 17200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is None: 17220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobj = bltn_open(name, mode + "b") 17230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 17250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = cls.taropen(name, mode, 17260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao gzip.GzipFile(name, mode, compresslevel, fileobj), 17270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao **kwargs) 17280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except IOError: 17290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError("not a gzip file") 17300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t._extfileobj = False 17310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return t 17320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao @classmethod 17340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 17350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Open bzip2 compressed tar archive name for reading or writing. 17360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Appending is not allowed. 17370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 17380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if len(mode) > 1 or mode not in "rw": 17390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("mode must be 'r' or 'w'.") 17400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 17420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import bz2 17430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 17440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise CompressionError("bz2 module is not available") 17450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is not None: 17470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobj = _BZ2Proxy(fileobj, mode) 17480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 17490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) 17500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 17520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = cls.taropen(name, mode, fileobj, **kwargs) 17530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except (IOError, EOFError): 17540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError("not a bzip2 file") 17550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t._extfileobj = False 17560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return t 17570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # All *open() methods are registered here. 17590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao OPEN_METH = { 17600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "tar": "taropen", # uncompressed tar 17610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "gz": "gzopen", # gzip compressed tar 17620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "bz2": "bz2open" # bzip2 compressed tar 17630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao } 17640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #-------------------------------------------------------------------------- 17660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The public methods which TarFile provides: 17670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 17690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Close the TarFile. In write-mode, two finishing zero blocks are 17700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao appended to the archive. 17710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 17720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closed: 17730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 17740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.mode in "aw": 17760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(NUL * (BLOCKSIZE * 2)) 17770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset += (BLOCKSIZE * 2) 17780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # fill up the end with zero-blocks 17790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # (like option -b20 for tar does) 17800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks, remainder = divmod(self.offset, RECORDSIZE) 17810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if remainder > 0: 17820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(NUL * (RECORDSIZE - remainder)) 17830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._extfileobj: 17850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.close() 17860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = True 17870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getmember(self, name): 17890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return a TarInfo object for member `name'. If `name' can not be 17900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao found in the archive, KeyError is raised. If a member occurs more 17910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao than once in the archive, its last occurrence is assumed to be the 17920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao most up-to-date version. 17930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 17940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self._getmember(name) 17950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo is None: 17960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise KeyError("filename %r not found" % name) 17970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return tarinfo 17980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 17990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getmembers(self): 18000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the members of the archive as a list of TarInfo objects. The 18010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao list has the same order as the members in the archive. 18020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 18030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check() 18040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._loaded: # if we want to obtain a list of 18050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._load() # all members, we first have to 18060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # scan the whole archive. 18070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.members 18080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getnames(self): 18100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the members of the archive as a list of their names. It has 18110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the same order as the list returned by getmembers(). 18120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 18130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return [tarinfo.name for tarinfo in self.getmembers()] 18140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def gettarinfo(self, name=None, arcname=None, fileobj=None): 18160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Create a TarInfo object for either the file `name' or the file 18170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao object `fileobj' (using os.fstat on its file descriptor). You can 18180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao modify some of the TarInfo's attributes before you add it using 18190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao addfile(). If given, `arcname' specifies an alternative name for the 18200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file in the archive. 18210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 18220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check("aw") 18230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # When fileobj is given, replace name by 18250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # fileobj's real name. 18260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is not None: 18270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = fileobj.name 18280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Building the name of the member in the archive. 18300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Backward slashes are converted to forward slashes, 18310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Absolute paths are turned to relative paths. 18320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if arcname is None: 18330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = name 18340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao drv, arcname = os.path.splitdrive(arcname) 18350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = arcname.replace(os.sep, "/") 18360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = arcname.lstrip("/") 18370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Now, fill the TarInfo object with 18390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # information specific for the file. 18400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.tarinfo() 18410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.tarfile = self 18420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Use os.stat or os.lstat, depending on platform 18440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and if symlinks shall be resolved. 18450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is None: 18460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(os, "lstat") and not self.dereference: 18470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao statres = os.lstat(name) 18480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 18490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao statres = os.stat(name) 18500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 18510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao statres = os.fstat(fileobj.fileno()) 18520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao linkname = "" 18530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stmd = statres.st_mode 18550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if stat.S_ISREG(stmd): 18560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao inode = (statres.st_ino, statres.st_dev) 18570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self.dereference and statres.st_nlink > 1 and \ 18580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao inode in self.inodes and arcname != self.inodes[inode]: 18590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Is it a hardlink to an already 18600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # archived file? 18610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = LNKTYPE 18620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao linkname = self.inodes[inode] 18630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 18640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # The inode is added only if its valid. 18650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # For win32 it is always 0. 18660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = REGTYPE 18670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if inode[0]: 18680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.inodes[inode] = arcname 18690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif stat.S_ISDIR(stmd): 18700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = DIRTYPE 18710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif stat.S_ISFIFO(stmd): 18720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = FIFOTYPE 18730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif stat.S_ISLNK(stmd): 18740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = SYMTYPE 18750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao linkname = os.readlink(name) 18760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif stat.S_ISCHR(stmd): 18770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = CHRTYPE 18780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif stat.S_ISBLK(stmd): 18790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao type = BLKTYPE 18800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 18810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None 18820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 18830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Fill the TarInfo object with all 18840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # information we can get. 18850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.name = arcname 18860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.mode = stmd 18870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.uid = statres.st_uid 18880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.gid = statres.st_gid 18890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type == REGTYPE: 18900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.size = statres.st_size 18910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 18920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.size = 0L 18930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.mtime = statres.st_mtime 18940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.type = type 18950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.linkname = linkname 18960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pwd: 18970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 18980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 18990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 19000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 19010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if grp: 19020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 19030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 19040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 19050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 19060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type in (CHRTYPE, BLKTYPE): 19080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(os, "major") and hasattr(os, "minor"): 19090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.devmajor = os.major(statres.st_rdev) 19100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.devminor = os.minor(statres.st_rdev) 19110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return tarinfo 19120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def list(self, verbose=True): 19140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Print a table of contents to sys.stdout. If `verbose' is False, only 19150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the names of the members are printed. If it is True, an `ls -l'-like 19160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao output is produced. 19170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 19180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check() 19190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for tarinfo in self: 19210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if verbose: 19220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print filemode(tarinfo.mode), 19230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "%s/%s" % (tarinfo.uname or tarinfo.uid, 19240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.gname or tarinfo.gid), 19250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.ischr() or tarinfo.isblk(): 19260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "%10s" % ("%d,%d" \ 19270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao % (tarinfo.devmajor, tarinfo.devminor)), 19280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 19290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "%10d" % tarinfo.size, 19300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "%d-%02d-%02d %02d:%02d:%02d" \ 19310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao % time.localtime(tarinfo.mtime)[:6], 19320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print tarinfo.name + ("/" if tarinfo.isdir() else ""), 19340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if verbose: 19360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.issym(): 19370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "->", tarinfo.linkname, 19380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.islnk(): 19390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print "link to", tarinfo.linkname, 19400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print 19410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): 19430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Add the file `name' to the archive. `name' may be any type of file 19440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (directory, fifo, symbolic link, etc.). If given, `arcname' 19450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao specifies an alternative name for the file in the archive. 19460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao Directories are added recursively by default. This can be avoided by 19470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao setting `recursive' to False. `exclude' is a function that should 19480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return True for each filename to be excluded. `filter' is a function 19490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao that expects a TarInfo object argument and returns the changed 19500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TarInfo object, if it returns None the TarInfo object will be 19510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao excluded from the archive. 19520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 19530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check("aw") 19540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if arcname is None: 19560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao arcname = name 19570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Exclude pathnames. 19590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if exclude is not None: 19600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import warnings 19610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnings.warn("use the filter argument instead", 19620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao DeprecationWarning, 2) 19630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if exclude(name): 19640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(2, "tarfile: Excluded %r" % name) 19650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 19660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Skip if somebody tries to archive the archive... 19680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.name is not None and os.path.abspath(name) == self.name: 19690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(2, "tarfile: Skipped %r" % name) 19700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 19710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, name) 19730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create a TarInfo object from the file. 19750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.gettarinfo(name, arcname) 19760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo is None: 19780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, "tarfile: Unsupported type %r" % name) 19790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 19800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Change or exclude the TarInfo object. 19820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if filter is not None: 19830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = filter(tarinfo) 19840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo is None: 19850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(2, "tarfile: Excluded %r" % name) 19860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 19870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Append the tar header and data to the archive. 19890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.isreg(): 19900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with bltn_open(name, "rb") as f: 19910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.addfile(tarinfo, f) 19920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 19930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.isdir(): 19940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.addfile(tarinfo) 19950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if recursive: 19960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for f in os.listdir(name): 19970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.add(os.path.join(name, f), os.path.join(arcname, f), 19980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao recursive, exclude, filter) 19990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 20010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.addfile(tarinfo) 20020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def addfile(self, tarinfo, fileobj=None): 20040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 20050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao given, tarinfo.size bytes are read from it and added to the archive. 20060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao You can create TarInfo objects using gettarinfo(). 20070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao On Windows platforms, `fileobj' should always be opened with mode 20080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 'rb' to avoid irritation about the file size. 20090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 20100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check("aw") 20110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = copy.copy(tarinfo) 20130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 20150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(buf) 20160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset += len(buf) 20170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If there's data to follow, append it. 20190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if fileobj is not None: 20200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao copyfileobj(fileobj, self.fileobj, tarinfo.size) 20210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 20220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if remainder > 0: 20230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 20240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao blocks += 1 20250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset += blocks * BLOCKSIZE 20260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.members.append(tarinfo) 20280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def extractall(self, path=".", members=None): 20300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Extract all members from the archive to the current working 20310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directory and set owner, modification time and permissions on 20320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directories afterwards. `path' specifies a different directory 20330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao to extract to. `members' is optional and must be a subset of the 20340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao list returned by getmembers(). 20350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 20360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directories = [] 20370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if members is None: 20390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao members = self 20400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for tarinfo in members: 20420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.isdir(): 20430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Extract directories with a safe mode. 20440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directories.append(tarinfo) 20450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = copy.copy(tarinfo) 20460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo.mode = 0700 20470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.extract(tarinfo, path) 20480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Reverse sort directories. 20500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directories.sort(key=operator.attrgetter('name')) 20510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao directories.reverse() 20520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Set correct owner, mtime and filemode on directories. 20540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for tarinfo in directories: 20550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao dirpath = os.path.join(path, tarinfo.name) 20560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 20570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.chown(tarinfo, dirpath) 20580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.utime(tarinfo, dirpath) 20590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.chmod(tarinfo, dirpath) 20600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ExtractError, e: 20610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.errorlevel > 1: 20620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 20630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 20640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, "tarfile: %s" % e) 20650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def extract(self, member, path=""): 20670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Extract a member from the archive to the current working directory, 20680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao using its full name. Its file information is extracted as accurately 20690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao as possible. `member' may be a filename or a TarInfo object. You can 20700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao specify a different directory using `path'. 20710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 20720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check("r") 20730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(member, basestring): 20750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.getmember(member) 20760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 20770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = member 20780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Prepare the link target for makelink(). 20800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.islnk(): 20810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo._link_target = os.path.join(path, tarinfo.linkname) 20820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 20840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) 20850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EnvironmentError, e: 20860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.errorlevel > 0: 20870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 20880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 20890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if e.filename is None: 20900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, "tarfile: %s" % e.strerror) 20910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 20920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 20930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ExtractError, e: 20940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.errorlevel > 1: 20950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 20960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 20970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, "tarfile: %s" % e) 20980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 20990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def extractfile(self, member): 21000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Extract a member from the archive as a file object. `member' may be 21010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao a filename or a TarInfo object. If `member' is a regular file, a 21020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file-like object is returned. If `member' is a link, a file-like 21030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao object is constructed from the link's target. If `member' is none of 21040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao the above, None is returned. 21050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao The file-like object is read-only and provides the following 21060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao methods: read(), readline(), readlines(), seek() and tell() 21070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 21080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check("r") 21090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(member, basestring): 21110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.getmember(member) 21120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 21130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = member 21140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.isreg(): 21160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.fileobject(self, tarinfo) 21170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.type not in SUPPORTED_TYPES: 21190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If a member's type is unknown, it is treated as a 21200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # regular file. 21210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.fileobject(self, tarinfo) 21220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.islnk() or tarinfo.issym(): 21240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if isinstance(self.fileobj, _Stream): 21250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # A small but ugly workaround for the case that someone tries 21260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # to extract a (sym)link as a file-object from a non-seekable 21270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # stream of tar blocks. 21280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise StreamError("cannot extract (sym)link as file object") 21290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 21300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # A (sym)link's file object is its target's file object. 21310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.extractfile(self._find_link_target(tarinfo)) 21320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 21330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # If there's no data associated with the member (directory, chrdev, 21340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # blkdev, etc.), return None instead of a file object. 21350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None 21360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _extract_member(self, tarinfo, targetpath): 21380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Extract the TarInfo object tarinfo to a physical 21390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao file called targetpath. 21400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 21410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Fetch the TarInfo object for the given name 21420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # and build the destination pathname, replacing 21430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # forward slashes to platform specific separators. 21440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao targetpath = targetpath.rstrip("/") 21450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao targetpath = targetpath.replace("/", os.sep) 21460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create all upper directories. 21480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao upperdirs = os.path.dirname(targetpath) 21490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if upperdirs and not os.path.exists(upperdirs): 21500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Create directories that are not part of the archive with 21510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # default permissions. 21520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.makedirs(upperdirs) 21530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.islnk() or tarinfo.issym(): 21550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 21560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 21570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, tarinfo.name) 21580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.isreg(): 21600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makefile(tarinfo, targetpath) 21610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.isdir(): 21620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makedir(tarinfo, targetpath) 21630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.isfifo(): 21640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makefifo(tarinfo, targetpath) 21650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.ischr() or tarinfo.isblk(): 21660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makedev(tarinfo, targetpath) 21670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.islnk() or tarinfo.issym(): 21680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makelink(tarinfo, targetpath) 21690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif tarinfo.type not in SUPPORTED_TYPES: 21700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makeunknown(tarinfo, targetpath) 21710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 21720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makefile(tarinfo, targetpath) 21730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.chown(tarinfo, targetpath) 21750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not tarinfo.issym(): 21760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.chmod(tarinfo, targetpath) 21770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.utime(tarinfo, targetpath) 21780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #-------------------------------------------------------------------------- 21800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Below are the different file methods. They are called via 21810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # _extract_member() when extract() is called. They can be replaced in a 21820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # subclass to implement other functionality. 21830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def makedir(self, tarinfo, targetpath): 21850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Make a directory called targetpath. 21860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 21870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 21880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Use a safe mode for the directory, the real mode is set 21890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # later in _extract_member(). 21900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.mkdir(targetpath, 0700) 21910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EnvironmentError, e: 21920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if e.errno != errno.EEXIST: 21930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise 21940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 21950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def makefile(self, tarinfo, targetpath): 21960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Make a file called targetpath. 21970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 21980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao source = self.extractfile(tarinfo) 21990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 22000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao with bltn_open(targetpath, "wb") as target: 22010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao copyfileobj(source, target) 22020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao finally: 22030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao source.close() 22040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def makeunknown(self, tarinfo, targetpath): 22060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Make a file from a TarInfo object with an unknown type 22070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao at targetpath. 22080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 22090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.makefile(tarinfo, targetpath) 22100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(1, "tarfile: Unknown file type %r, " \ 22110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao "extracted as regular file." % tarinfo.type) 22120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def makefifo(self, tarinfo, targetpath): 22140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Make a fifo called targetpath. 22150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 22160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(os, "mkfifo"): 22170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.mkfifo(targetpath) 22180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 22190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ExtractError("fifo not supported by system") 22200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def makedev(self, tarinfo, targetpath): 22220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Make a character or block device called targetpath. 22230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 22240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 22250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ExtractError("special devices not supported by system") 22260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode = tarinfo.mode 22280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.isblk(): 22290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode |= stat.S_IFBLK 22300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 22310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao mode |= stat.S_IFCHR 22320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.mknod(targetpath, mode, 22340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.makedev(tarinfo.devmajor, tarinfo.devminor)) 22350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def makelink(self, tarinfo, targetpath): 22370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Make a (symbolic) link called targetpath. If it cannot be created 22380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao (platform limitation), we try to make a copy of the referenced file 22390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao instead of a link. 22400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 22410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(os, "symlink") and hasattr(os, "link"): 22420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # For systems that support symbolic and hard links. 22430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.issym(): 22440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.lexists(targetpath): 22450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.unlink(targetpath) 22460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.symlink(tarinfo.linkname, targetpath) 22470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 22480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # See extract(). 22490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.exists(tarinfo._link_target): 22500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if os.path.lexists(targetpath): 22510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.unlink(targetpath) 22520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.link(tarinfo._link_target, targetpath) 22530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 22540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._extract_member(self._find_link_target(tarinfo), targetpath) 22550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 22560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 22570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._extract_member(self._find_link_target(tarinfo), targetpath) 22580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 22590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ExtractError("unable to resolve link inside archive") 22600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def chown(self, tarinfo, targetpath): 22620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Set owner of targetpath according to tarinfo. 22630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 22640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: 22650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # We have to be root to do so. 22660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 22670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao g = grp.getgrnam(tarinfo.gname)[2] 22680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 22690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao g = tarinfo.gid 22700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 22710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u = pwd.getpwnam(tarinfo.uname)[2] 22720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except KeyError: 22730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao u = tarinfo.uid 22740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 22750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.issym() and hasattr(os, "lchown"): 22760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.lchown(targetpath, u, g) 22770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 22780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if sys.platform != "os2emx": 22790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.chown(targetpath, u, g) 22800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EnvironmentError, e: 22810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ExtractError("could not change owner") 22820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def chmod(self, tarinfo, targetpath): 22840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Set file permissions of targetpath according to tarinfo. 22850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 22860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if hasattr(os, 'chmod'): 22870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 22880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.chmod(targetpath, tarinfo.mode) 22890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EnvironmentError, e: 22900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ExtractError("could not change mode") 22910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 22920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def utime(self, tarinfo, targetpath): 22930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Set modification time of targetpath according to tarinfo. 22940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 22950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not hasattr(os, 'utime'): 22960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 22970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 22980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) 22990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EnvironmentError, e: 23000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ExtractError("could not change modification time") 23010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #-------------------------------------------------------------------------- 23030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def next(self): 23040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the next member of the archive as a TarInfo object, when 23050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao TarFile is opened for reading. Return None if there is no more 23060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao available. 23070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 23080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check("ra") 23090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.firstmember is not None: 23100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao m = self.firstmember 23110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.firstmember = None 23120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return m 23130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Read the next block. 23150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.seek(self.offset) 23160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = None 23170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 23180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 23190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.tarinfo.fromtarfile(self) 23200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EOFHeaderError, e: 23210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.ignore_zeros: 23220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(2, "0x%X: %s" % (self.offset, e)) 23230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset += BLOCKSIZE 23240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 23250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except InvalidHeaderError, e: 23260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.ignore_zeros: 23270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._dbg(2, "0x%X: %s" % (self.offset, e)) 23280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset += BLOCKSIZE 23290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao continue 23300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self.offset == 0: 23310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError(str(e)) 23320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except EmptyHeaderError: 23330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.offset == 0: 23340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError("empty file") 23350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except TruncatedHeaderError, e: 23360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.offset == 0: 23370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError(str(e)) 23380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except SubsequentHeaderError, e: 23390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ReadError(str(e)) 23400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 23410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo is not None: 23430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.members.append(tarinfo) 23440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 23450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._loaded = True 23460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return tarinfo 23480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao #-------------------------------------------------------------------------- 23500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Little helper methods: 23510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _getmember(self, name, tarinfo=None, normalize=False): 23530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Find an archive member by name from bottom to top. 23540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao If tarinfo is given, it is used as the starting point. 23550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 23560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Ensure that all members have been loaded. 23570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao members = self.getmembers() 23580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Limit the member search list up to tarinfo. 23600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo is not None: 23610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao members = members[:members.index(tarinfo)] 23620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if normalize: 23640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao name = os.path.normpath(name) 23650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for member in reversed(members): 23670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if normalize: 23680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao member_name = os.path.normpath(member.name) 23690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 23700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao member_name = member.name 23710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if name == member_name: 23730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return member 23740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _load(self): 23760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Read through the entire archive file and look for readable 23770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao members. 23780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 23790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 23800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.next() 23810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo is None: 23820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 23830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._loaded = True 23840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _check(self, mode=None): 23860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Check if TarFile is still open, and if the operation's mode 23870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao corresponds to TarFile's mode. 23880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 23890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.closed: 23900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError("%s is closed" % self.__class__.__name__) 23910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode is not None and self.mode not in mode: 23920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise IOError("bad operation for mode %r" % self.mode) 23930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 23940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _find_link_target(self, tarinfo): 23950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Find the target member of a symlink or hardlink member in the 23960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao archive. 23970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 23980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if tarinfo.issym(): 23990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Always search the entire archive. 24000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 24010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao limit = None 24020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 24030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Search the archive before the link, because a hard link is 24040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # just a reference to an already archived file. 24050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao linkname = tarinfo.linkname 24060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao limit = tarinfo 24070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao member = self._getmember(linkname, tarinfo=limit, normalize=True) 24090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if member is None: 24100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise KeyError("linkname %r not found" % linkname) 24110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return member 24120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __iter__(self): 24140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Provide an iterator object. 24150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self._loaded: 24170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return iter(self.members) 24180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 24190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return TarIter(self) 24200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def _dbg(self, level, msg): 24220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Write debugging output to sys.stderr. 24230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if level <= self.debug: 24250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao print >> sys.stderr, msg 24260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __enter__(self): 24280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self._check() 24290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self 24300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __exit__(self, type, value, traceback): 24320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if type is None: 24330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.close() 24340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 24350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # An exception occurred. We must not call close() because 24360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # it would try to write end-of-archive blocks and padding. 24370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not self._extfileobj: 24380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.fileobj.close() 24390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.closed = True 24400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class TarFile 24410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24420a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarIter: 24430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Iterator Class. 24440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for tarinfo in TarFile(...): 24460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao suite... 24470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, tarfile): 24500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Construct a TarIter object. 24510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile = tarfile 24530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.index = 0 24540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __iter__(self): 24550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return iterator object. 24560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self 24580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def next(self): 24590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return the next item using TarFile's next() method. 24600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao When all members have been read, set TarFile as _loaded. 24610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # Fix for SF #1100429: Under rare circumstances it can 24630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # happen that getmembers() is called during iteration, 24640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # which will cause TarIter to stop prematurely. 24650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if self.index == 0 and self.tarfile.firstmember is not None: 24670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.tarfile.next() 24680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif self.index < len(self.tarfile.members): 24690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.tarfile.members[self.index] 24700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif not self.tarfile._loaded: 24710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tarinfo = self.tarfile.next() 24720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if not tarinfo: 24730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile._loaded = True 24740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise StopIteration 24750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 24760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise StopIteration 24770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.index += 1 24780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return tarinfo 24790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Helper classes for sparse file support 24810a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _section: 24820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Base class for _data and _hole. 24830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, offset, size): 24850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.offset = offset 24860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.size = size 24870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __contains__(self, offset): 24880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.offset <= offset < self.offset + self.size 24890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24900a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _data(_section): 24910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Represent a data section in a sparse file. 24920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 24930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, offset, size, realpos): 24940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao _section.__init__(self, offset, size) 24950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.realpos = realpos 24960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 24970a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _hole(_section): 24980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Represent a hole section in a sparse file. 24990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 25000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao pass 25010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 25020a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _ringbuffer(list): 25030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Ringbuffer class which increases performance 25040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao over a regular list. 25050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 25060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self): 25070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.idx = 0 25080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def find(self, offset): 25090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao idx = self.idx 25100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao while True: 25110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao item = self[idx] 25120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if offset in item: 25130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao break 25140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao idx += 1 25150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if idx == len(self): 25160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao idx = 0 25170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if idx == self.idx: 25180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao # End of File 25190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return None 25200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.idx = idx 25210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return item 25220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 25230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------- 25240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# zipfile compatible TarFile class 25250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------------------------------- 25260a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTAR_PLAIN = 0 # zipfile.ZIP_STORED 25270a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED 25280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarFileCompat: 25290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """TarFile class compatible with standard module zipfile's 25300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao ZipFile class. 25310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 25320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def __init__(self, file, mode="r", compression=TAR_PLAIN): 25330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from warnings import warnpy3k 25340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao warnpy3k("the TarFileCompat class has been removed in Python 3.0", 25350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao stacklevel=2) 25360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if compression == TAR_PLAIN: 25370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile = TarFile.taropen(file, mode) 25380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao elif compression == TAR_GZIPPED: 25390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile = TarFile.gzopen(file, mode) 25400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao else: 25410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao raise ValueError("unknown compression constant") 25420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao if mode[0:1] == "r": 25430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao members = self.tarfile.getmembers() 25440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao for m in members: 25450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao m.filename = m.name 25460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao m.file_size = m.size 25470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao m.date_time = time.gmtime(m.mtime)[:6] 25480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def namelist(self): 25490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return map(lambda m: m.name, self.infolist()) 25500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def infolist(self): 25510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return filter(lambda m: m.type in REGULAR_TYPES, 25520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile.getmembers()) 25530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def printdir(self): 25540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile.list() 25550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def testzip(self): 25560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return 25570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def getinfo(self, name): 25580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.tarfile.getmember(name) 25590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def read(self, name): 25600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return self.tarfile.extractfile(self.tarfile.getmember(name)).read() 25610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def write(self, filename, arcname=None, compress_type=None): 25620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile.add(filename, arcname) 25630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def writestr(self, zinfo, bytes): 25640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 25650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from cStringIO import StringIO 25660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except ImportError: 25670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao from StringIO import StringIO 25680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao import calendar 25690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tinfo = TarInfo(zinfo.filename) 25700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tinfo.size = len(bytes) 25710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao tinfo.mtime = calendar.timegm(zinfo.date_time) 25720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile.addfile(tinfo, StringIO(bytes)) 25730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao def close(self): 25740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao self.tarfile.close() 25750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#class TarFileCompat 25760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 25770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#-------------------- 25780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# exported functions 25790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#-------------------- 25800a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef is_tarfile(name): 25810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """Return True if name points to a tar archive that we 25820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao are able to handle, else return False. 25830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao """ 25840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao try: 25850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t = open(name) 25860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao t.close() 25870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return True 25880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao except TarError: 25890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao return False 25900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao 25910a8c90248264a8b26970b4473770bcc3df8515fJosh Gaobltn_open = open 25920a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoopen = TarFile.open 2593