10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#!/usr/bin/env python
20a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# -*- coding: iso-8859-1 -*-
30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#-------------------------------------------------------------------
40a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# tarfile.py
50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#-------------------------------------------------------------------
60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de>
70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# All rights reserved.
80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Permission  is  hereby granted,  free  of charge,  to  any person
100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# obtaining a  copy of  this software  and associated documentation
110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# files  (the  "Software"),  to   deal  in  the  Software   without
120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# restriction,  including  without limitation  the  rights to  use,
130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# copy, modify, merge, publish, distribute, sublicense, and/or sell
140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# copies  of  the  Software,  and to  permit  persons  to  whom the
150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Software  is  furnished  to  do  so,  subject  to  the  following
160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# conditions:
170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# The above copyright  notice and this  permission notice shall  be
190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# included in all copies or substantial portions of the Software.
200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# OTHER DEALINGS IN THE SOFTWARE.
290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""Read from and write to tar format archives.
310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""
320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__version__ = "$Revision: 85213 $"
340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# $Source$
350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
360a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoversion     = "0.9.0"
370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__author__  = "Lars Gust�bel (lars@gustaebel.de)"
380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__date__    = "$Date$"
390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__cvsid__   = "$Id$"
400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__credits__ = "Gustavo Niemeyer, Niels Gust�bel, Richard Townsend."
410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------
430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Imports
440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------
450a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sys
460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport os
470a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport shutil
480a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport stat
490a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport errno
500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport time
510a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport struct
520a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport copy
530a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport re
540a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport operator
550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
560a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry:
570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    import grp, pwd
580a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept ImportError:
590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    grp = pwd = None
600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# from tarfile import *
620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# tar constants
660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
670a8c90248264a8b26970b4473770bcc3df8515fJosh GaoNUL = "\0"                      # the null character
680a8c90248264a8b26970b4473770bcc3df8515fJosh GaoBLOCKSIZE = 512                 # length of processing blocks
690a8c90248264a8b26970b4473770bcc3df8515fJosh GaoRECORDSIZE = BLOCKSIZE * 20     # length of records
700a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNU_MAGIC = "ustar  \0"         # magic gnu tar string
710a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPOSIX_MAGIC = "ustar\x0000"     # magic posix tar string
720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
730a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLENGTH_NAME = 100               # maximum length of a filename
740a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLENGTH_LINK = 100               # maximum length of a linkname
750a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLENGTH_PREFIX = 155             # maximum length of the prefix field
760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
770a8c90248264a8b26970b4473770bcc3df8515fJosh GaoREGTYPE = "0"                   # regular file
780a8c90248264a8b26970b4473770bcc3df8515fJosh GaoAREGTYPE = "\0"                 # regular file
790a8c90248264a8b26970b4473770bcc3df8515fJosh GaoLNKTYPE = "1"                   # link (inside tarfile)
800a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSYMTYPE = "2"                   # symbolic link
810a8c90248264a8b26970b4473770bcc3df8515fJosh GaoCHRTYPE = "3"                   # character special device
820a8c90248264a8b26970b4473770bcc3df8515fJosh GaoBLKTYPE = "4"                   # block special device
830a8c90248264a8b26970b4473770bcc3df8515fJosh GaoDIRTYPE = "5"                   # directory
840a8c90248264a8b26970b4473770bcc3df8515fJosh GaoFIFOTYPE = "6"                  # fifo special device
850a8c90248264a8b26970b4473770bcc3df8515fJosh GaoCONTTYPE = "7"                  # contiguous file
860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
870a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNUTYPE_LONGNAME = "L"          # GNU tar longname
880a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNUTYPE_LONGLINK = "K"          # GNU tar longlink
890a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNUTYPE_SPARSE = "S"            # GNU tar sparse file
900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
910a8c90248264a8b26970b4473770bcc3df8515fJosh GaoXHDTYPE = "x"                   # POSIX.1-2001 extended header
920a8c90248264a8b26970b4473770bcc3df8515fJosh GaoXGLTYPE = "g"                   # POSIX.1-2001 global header
930a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSOLARIS_XHDTYPE = "X"           # Solaris extended header
940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
950a8c90248264a8b26970b4473770bcc3df8515fJosh GaoUSTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
960a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNU_FORMAT = 1                  # GNU tar format
970a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
980a8c90248264a8b26970b4473770bcc3df8515fJosh GaoDEFAULT_FORMAT = GNU_FORMAT
990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# tarfile constants
1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# File types that tarfile supports:
1040a8c90248264a8b26970b4473770bcc3df8515fJosh GaoSUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                   SYMTYPE, DIRTYPE, FIFOTYPE,
1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                   CONTTYPE, CHRTYPE, BLKTYPE,
1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                   GNUTYPE_SPARSE)
1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# File types that will be treated as a regular file.
1110a8c90248264a8b26970b4473770bcc3df8515fJosh GaoREGULAR_TYPES = (REGTYPE, AREGTYPE,
1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                 CONTTYPE, GNUTYPE_SPARSE)
1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# File types that are part of the GNU tar format.
1150a8c90248264a8b26970b4473770bcc3df8515fJosh GaoGNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao             GNUTYPE_SPARSE)
1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Fields from a pax header that override a TarInfo attribute.
1190a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPAX_FIELDS = ("path", "linkpath", "size", "mtime",
1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao              "uid", "gid", "uname", "gname")
1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Fields in a pax header that are numbers, all other fields
1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# are treated as strings.
1240a8c90248264a8b26970b4473770bcc3df8515fJosh GaoPAX_NUMBER_FIELDS = {
1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "atime": float,
1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "ctime": float,
1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "mtime": float,
1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "uid": int,
1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "gid": int,
1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    "size": int
1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao}
1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Bits used in the mode field, values in octal.
1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1360a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFLNK = 0120000        # symbolic link
1370a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFREG = 0100000        # regular file
1380a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFBLK = 0060000        # block device
1390a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFDIR = 0040000        # directory
1400a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFCHR = 0020000        # character device
1410a8c90248264a8b26970b4473770bcc3df8515fJosh GaoS_IFIFO = 0010000        # fifo
1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1430a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTSUID   = 04000          # set UID on execution
1440a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTSGID   = 02000          # set GID on execution
1450a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTSVTX   = 01000          # reserved
1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1470a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTUREAD  = 0400           # read by owner
1480a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTUWRITE = 0200           # write by owner
1490a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTUEXEC  = 0100           # execute/search by owner
1500a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTGREAD  = 0040           # read by group
1510a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTGWRITE = 0020           # write by group
1520a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTGEXEC  = 0010           # execute/search by group
1530a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTOREAD  = 0004           # read by other
1540a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTOWRITE = 0002           # write by other
1550a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTOEXEC  = 0001           # execute/search by other
1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# initialization
1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1600a8c90248264a8b26970b4473770bcc3df8515fJosh GaoENCODING = sys.getfilesystemencoding()
1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif ENCODING is None:
1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ENCODING = sys.getdefaultencoding()
1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Some useful functions
1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------------------
1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef stn(s, length):
1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Convert a python string to a null-terminated string buffer.
1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return s[:length] + (length - len(s)) * NUL
1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef nts(s):
1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Convert a null-terminated string field to a python string.
1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Use the string up to the first null char.
1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    p = s.find("\0")
1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if p == -1:
1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return s
1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return s[:p]
1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef nti(s):
1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Convert a number field to a python number.
1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # There are two possible encodings for a number field, see
1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # itn() below.
1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if s[0] != chr(0200):
1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n = int(nts(s) or "0", 8)
1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ValueError:
1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise InvalidHeaderError("invalid header")
1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        n = 0L
1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for i in xrange(len(s) - 1):
1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n <<= 8
1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n += ord(s[i + 1])
1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return n
1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef itn(n, digits=8, format=DEFAULT_FORMAT):
2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Convert a python number to a number field.
2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # octal digits followed by a null-byte, this allows values up to
2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # that if necessary. A leading 0200 byte indicates this particular
2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # encoding, the following digits-1 bytes are a big-endian
2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # representation. This allows values up to (256**(digits-1))-1.
2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if 0 <= n < 8 ** (digits - 1):
2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = "%0*o" % (digits - 1, n) + NUL
2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("overflow in number field")
2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if n < 0:
2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # XXX We mimic GNU tar's behaviour with negative numbers,
2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # this could raise OverflowError.
2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n = struct.unpack("L", struct.pack("l", n))[0]
2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = ""
2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for i in xrange(digits - 1):
2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            s = chr(n & 0377) + s
2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n >>= 8
2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        s = chr(0200) + s
2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return s
2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef uts(s, encoding, errors):
2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Convert a unicode object to a string.
2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if errors == "utf-8":
2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # An extra error handler similar to the -o invalid=UTF-8 option
2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # in POSIX.1-2001. Replace untranslatable characters with their
2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # UTF-8 representation.
2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return s.encode(encoding, "strict")
2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except UnicodeEncodeError:
2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            x = []
2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for c in s:
2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    x.append(c.encode(encoding, "strict"))
2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except UnicodeEncodeError:
2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    x.append(c.encode("utf8"))
2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return "".join(x)
2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return s.encode(encoding, errors)
2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef calc_chksums(buf):
2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Calculate the checksum for a member's header by summing up all
2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       characters except for the chksum field which is treated as if
2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       it was filled with spaces. According to the GNU tar sources,
2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       some tars (Sun and NeXT) calculate chksum with signed char,
2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       which will be different if there are chars in the buffer with
2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       the high bit set. So we calculate two checksums, unsigned and
2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       signed.
2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return unsigned_chksum, signed_chksum
2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef copyfileobj(src, dst, length=None):
2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Copy length bytes from fileobj src to fileobj dst.
2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       If length is None, copy the entire content.
2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if length == 0:
2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return
2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if length is None:
2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        shutil.copyfileobj(src, dst)
2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return
2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    BUFSIZE = 16 * 1024
2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    blocks, remainder = divmod(length, BUFSIZE)
2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for b in xrange(blocks):
2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = src.read(BUFSIZE)
2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(buf) < BUFSIZE:
2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError("end of file reached")
2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        dst.write(buf)
2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if remainder != 0:
2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = src.read(remainder)
2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(buf) < remainder:
2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError("end of file reached")
2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        dst.write(buf)
2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return
2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofilemode_table = (
2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((S_IFLNK,      "l"),
2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (S_IFREG,      "-"),
2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (S_IFBLK,      "b"),
2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (S_IFDIR,      "d"),
2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (S_IFCHR,      "c"),
2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (S_IFIFO,      "p")),
2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TUREAD,       "r"),),
2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TUWRITE,      "w"),),
2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TUEXEC|TSUID, "s"),
2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (TSUID,        "S"),
2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (TUEXEC,       "x")),
2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TGREAD,       "r"),),
2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TGWRITE,      "w"),),
3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TGEXEC|TSGID, "s"),
3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (TSGID,        "S"),
3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (TGEXEC,       "x")),
3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TOREAD,       "r"),),
3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TOWRITE,      "w"),),
3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ((TOEXEC|TSVTX, "t"),
3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (TSVTX,        "T"),
3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao     (TOEXEC,       "x"))
3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao)
3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef filemode(mode):
3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Convert a file's mode to a string of the form
3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       -rwxrwxrwx.
3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       Used by TarFile.list()
3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    perm = []
3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for table in filemode_table:
3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for bit, char in table:
3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if mode & bit == bit:
3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                perm.append(char)
3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            perm.append("-")
3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return "".join(perm)
3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarError(Exception):
3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Base exception."""
3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExtractError(TarError):
3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """General exception for extract errors."""
3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ReadError(TarError):
3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for unreadble tar archives."""
3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass CompressionError(TarError):
3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for unavailable compression methods."""
3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass StreamError(TarError):
3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for unsupported operations on stream-like TarFiles."""
3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass HeaderError(TarError):
3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Base exception for header errors."""
3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass EmptyHeaderError(HeaderError):
3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for empty headers."""
3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TruncatedHeaderError(HeaderError):
3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for truncated headers."""
3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass EOFHeaderError(HeaderError):
3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for end of file headers."""
3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass InvalidHeaderError(HeaderError):
3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for invalid headers."""
3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass SubsequentHeaderError(HeaderError):
3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Exception for missing and invalid extended headers."""
3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------
3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# internal stream interface
3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------
3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _LowLevelFile:
3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Low-level file object. Supports reading and writing.
3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       It is used instead of a regular file object for streaming
3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       access.
3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, name, mode):
3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mode = {
3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "r": os.O_RDONLY,
3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        }[mode]
3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(os, "O_BINARY"):
3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            mode |= os.O_BINARY
3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fd = os.open(name, mode, 0666)
3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        os.close(self.fd)
3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def read(self, size):
3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return os.read(self.fd, size)
3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def write(self, s):
3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        os.write(self.fd, s)
3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _Stream:
3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Class that serves as an adapter between TarFile and
3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       a stream-like object.  The stream-like object only
3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       needs to have a read() or write() method and is accessed
3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       blockwise.  Use of gzip or bzip2 compression is possible.
3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       A stream-like object could be for example: sys.stdin,
3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       sys.stdout, a socket, a tape device etc.
3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       _Stream is intended to be used only internally.
3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, name, mode, comptype, fileobj, bufsize):
3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Construct a _Stream object.
4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._extfileobj = True
4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if fileobj is None:
4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fileobj = _LowLevelFile(name, mode)
4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._extfileobj = False
4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if comptype == '*':
4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Enable transparent compression detection for the
4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # stream interface
4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fileobj = _StreamProxy(fileobj)
4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            comptype = fileobj.getcomptype()
4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.name     = name or ""
4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.mode     = mode
4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.comptype = comptype
4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj  = fileobj
4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.bufsize  = bufsize
4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buf      = ""
4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pos      = 0L
4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.closed   = False
4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if comptype == "gz":
4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                import zlib
4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except ImportError:
4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise CompressionError("zlib module is not available")
4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.zlib = zlib
4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.crc = zlib.crc32("") & 0xffffffffL
4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if mode == "r":
4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._init_read_gz()
4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._init_write_gz()
4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if comptype == "bz2":
4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                import bz2
4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except ImportError:
4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise CompressionError("bz2 module is not available")
4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if mode == "r":
4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.dbuf = ""
4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.cmp = bz2.BZ2Decompressor()
4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.cmp = bz2.BZ2Compressor()
4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __del__(self):
4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(self, "closed") and not self.closed:
4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.close()
4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _init_write_gz(self):
4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Initialize for writing with gzip compression.
4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                            -self.zlib.MAX_WBITS,
4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                            self.zlib.DEF_MEM_LEVEL,
4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                            0)
4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        timestamp = struct.pack("<L", long(time.time()))
4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.__write("\037\213\010\010%s\002\377" % timestamp)
4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if type(self.name) is unicode:
4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.name = self.name.encode("iso-8859-1", "replace")
4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.name.endswith(".gz"):
4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.name = self.name[:-3]
4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.__write(self.name + NUL)
4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def write(self, s):
4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Write string s to the stream.
4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.comptype == "gz":
4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pos += len(s)
4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.comptype != "tar":
4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            s = self.cmp.compress(s)
4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.__write(s)
4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __write(self, s):
4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Write string s to the stream if a whole new block
4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           is ready to be written.
4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buf += s
4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while len(self.buf) > self.bufsize:
4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.write(self.buf[:self.bufsize])
4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.buf = self.buf[self.bufsize:]
4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Close the _Stream object. No operation should be
4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           done on it afterwards.
4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closed:
4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.mode == "w" and self.comptype != "tar":
4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.buf += self.cmp.flush()
4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.mode == "w" and self.buf:
4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.write(self.buf)
4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.buf = ""
4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self.comptype == "gz":
4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # The native zlib crc is an unsigned 32-bit integer, but
4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # the Python wrapper implicitly casts that to a signed C
4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # long.  So, on a 32-bit box self.crc may "look negative",
4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # while the same crc on a 64-bit box may "look positive".
5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # To avoid irksome warnings from the `struct` module, force
5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # it to look positive on all boxes.
5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not self._extfileobj:
5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.close()
5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.closed = True
5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _init_read_gz(self):
5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Initialize for reading a gzip compressed fileobj.
5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.dbuf = ""
5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # taken from gzip.GzipFile with some alterations
5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.__read(2) != "\037\213":
5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ReadError("not a gzip file")
5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.__read(1) != "\010":
5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise CompressionError("unsupported compression method")
5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        flag = ord(self.__read(1))
5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.__read(6)
5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if flag & 4:
5260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
5270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.read(xlen)
5280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if flag & 8:
5290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while True:
5300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                s = self.__read(1)
5310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if not s or s == NUL:
5320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
5330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if flag & 16:
5340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while True:
5350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                s = self.__read(1)
5360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if not s or s == NUL:
5370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
5380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if flag & 2:
5390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.__read(2)
5400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def tell(self):
5420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the stream's file pointer position.
5430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
5440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.pos
5450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def seek(self, pos=0):
5470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Set the stream's file pointer to pos. Negative seeking
5480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           is forbidden.
5490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
5500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if pos - self.pos >= 0:
5510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            blocks, remainder = divmod(pos - self.pos, self.bufsize)
5520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for i in xrange(blocks):
5530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.read(self.bufsize)
5540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.read(remainder)
5550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise StreamError("seeking backwards is not allowed")
5570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.pos
5580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def read(self, size=None):
5600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the next size number of bytes from the stream.
5610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           If size is not defined, return all bytes of the stream
5620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           up to EOF.
5630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
5640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if size is None:
5650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            t = []
5660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while True:
5670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                buf = self._read(self.bufsize)
5680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if not buf:
5690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
5700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                t.append(buf)
5710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = "".join(t)
5720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
5730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = self._read(size)
5740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pos += len(buf)
5750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return buf
5760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _read(self, size):
5780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return size bytes from the stream.
5790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
5800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.comptype == "tar":
5810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.__read(size)
5820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        c = len(self.dbuf)
5840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t = [self.dbuf]
5850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while c < size:
5860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = self.__read(self.bufsize)
5870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not buf:
5880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
5890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
5900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                buf = self.cmp.decompress(buf)
5910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except IOError:
5920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ReadError("invalid compressed data")
5930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            t.append(buf)
5940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            c += len(buf)
5950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t = "".join(t)
5960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.dbuf = t[size:]
5970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return t[:size]
5980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __read(self, size):
6000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return size bytes from stream. If internal buffer is empty,
6010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           read another block from the stream.
6020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
6030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        c = len(self.buf)
6040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t = [self.buf]
6050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while c < size:
6060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = self.fileobj.read(self.bufsize)
6070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not buf:
6080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
6090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            t.append(buf)
6100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            c += len(buf)
6110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t = "".join(t)
6120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buf = t[size:]
6130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return t[:size]
6140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class _Stream
6150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6160a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _StreamProxy(object):
6170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Small proxy class that enables transparent compression
6180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       detection for the Stream interface (mode 'r|*').
6190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
6200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, fileobj):
6220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj = fileobj
6230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buf = self.fileobj.read(BLOCKSIZE)
6240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def read(self, size):
6260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.read = self.fileobj.read
6270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.buf
6280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getcomptype(self):
6300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.buf.startswith("\037\213\010"):
6310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return "gz"
6320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
6330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return "bz2"
6340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return "tar"
6350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
6370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj.close()
6380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class StreamProxy
6390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6400a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _BZ2Proxy(object):
6410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Small proxy class that enables external file object
6420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       support for "r:bz2" and "w:bz2" modes. This is actually
6430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       a workaround for a limitation in bz2 module's BZ2File
6440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       class which (unlike gzip.GzipFile) has no support for
6450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       a file object argument.
6460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
6470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    blocksize = 16 * 1024
6490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, fileobj, mode):
6510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj = fileobj
6520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.mode = mode
6530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.name = getattr(self.fileobj, "name", None)
6540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.init()
6550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def init(self):
6570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import bz2
6580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pos = 0
6590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.mode == "r":
6600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.bz2obj = bz2.BZ2Decompressor()
6610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.seek(0)
6620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.buf = ""
6630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
6640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.bz2obj = bz2.BZ2Compressor()
6650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def read(self, size):
6670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        b = [self.buf]
6680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        x = len(self.buf)
6690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while x < size:
6700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raw = self.fileobj.read(self.blocksize)
6710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not raw:
6720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
6730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            data = self.bz2obj.decompress(raw)
6740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            b.append(data)
6750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            x += len(data)
6760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buf = "".join(b)
6770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = self.buf[:size]
6790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buf = self.buf[size:]
6800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pos += len(buf)
6810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return buf
6820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def seek(self, pos):
6840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if pos < self.pos:
6850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.init()
6860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.read(pos - self.pos)
6870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def tell(self):
6890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.pos
6900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def write(self, data):
6920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pos += len(data)
6930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raw = self.bz2obj.compress(data)
6940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj.write(raw)
6950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
6960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
6970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.mode == "w":
6980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raw = self.bz2obj.flush()
6990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.write(raw)
7000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class _BZ2Proxy
7010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------------
7030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Extraction file object
7040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------------
7050a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _FileInFile(object):
7060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """A thin wrapper around an existing file object that
7070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       provides a part of its data as an individual file
7080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       object.
7090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
7100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, fileobj, offset, size, sparse=None):
7120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj = fileobj
7130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset = offset
7140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.size = size
7150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.sparse = sparse
7160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.position = 0
7170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def tell(self):
7190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the current file position.
7200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
7210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.position
7220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def seek(self, position):
7240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Seek to a position in the file.
7250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
7260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.position = position
7270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def read(self, size=None):
7290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Read data from the file.
7300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
7310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if size is None:
7320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            size = self.size - self.position
7330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            size = min(size, self.size - self.position)
7350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.sparse is None:
7370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.readnormal(size)
7380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.readsparse(size)
7400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def readnormal(self, size):
7420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Read operation for regular files.
7430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
7440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj.seek(self.offset + self.position)
7450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.position += size
7460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.fileobj.read(size)
7470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def readsparse(self, size):
7490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Read operation for sparse files.
7500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
7510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        data = []
7520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while size > 0:
7530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = self.readsparsesection(size)
7540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not buf:
7550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
7560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            size -= len(buf)
7570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            data.append(buf)
7580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return "".join(data)
7590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def readsparsesection(self, size):
7610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Read a single section of a sparse file.
7620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
7630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        section = self.sparse.find(self.position)
7640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if section is None:
7660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return ""
7670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        size = min(size, section.offset + section.size - self.position)
7690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(section, _data):
7710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            realpos = section.realpos + self.position - section.offset
7720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.seek(self.offset + realpos)
7730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.position += size
7740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.fileobj.read(size)
7750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
7760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.position += size
7770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return NUL * size
7780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#class _FileInFile
7790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7810a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass ExFileObject(object):
7820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """File-like object for reading an archive member.
7830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       Is returned by TarFile.extractfile().
7840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
7850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    blocksize = 1024
7860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, tarfile, tarinfo):
7880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj = _FileInFile(tarfile.fileobj,
7890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                   tarinfo.offset_data,
7900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                   tarinfo.size,
7910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                   getattr(tarinfo, "sparse", None))
7920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.name = tarinfo.name
7930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.mode = "r"
7940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.closed = False
7950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.size = tarinfo.size
7960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
7970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.position = 0
7980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buffer = ""
7990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def read(self, size=None):
8010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Read at most size bytes from the file. If size is not
8020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           present or None, read all data until EOF is reached.
8030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
8040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closed:
8050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("I/O operation on closed file")
8060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = ""
8080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.buffer:
8090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if size is None:
8100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                buf = self.buffer
8110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.buffer = ""
8120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
8130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                buf = self.buffer[:size]
8140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.buffer = self.buffer[size:]
8150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if size is None:
8170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf += self.fileobj.read()
8180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
8190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf += self.fileobj.read(size - len(buf))
8200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.position += len(buf)
8220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return buf
8230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def readline(self, size=-1):
8250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Read one entire line from the file. If size is present
8260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           and non-negative, return a string with at most that
8270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           size, which may be an incomplete line.
8280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
8290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closed:
8300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("I/O operation on closed file")
8310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if "\n" in self.buffer:
8330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pos = self.buffer.find("\n") + 1
8340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
8350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buffers = [self.buffer]
8360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while True:
8370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                buf = self.fileobj.read(self.blocksize)
8380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                buffers.append(buf)
8390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if not buf or "\n" in buf:
8400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.buffer = "".join(buffers)
8410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    pos = self.buffer.find("\n") + 1
8420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if pos == 0:
8430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        # no newline found.
8440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        pos = len(self.buffer)
8450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
8460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if size != -1:
8480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pos = min(size, pos)
8490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = self.buffer[:pos]
8510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buffer = self.buffer[pos:]
8520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.position += len(buf)
8530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return buf
8540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def readlines(self):
8560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a list with all remaining lines.
8570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
8580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        result = []
8590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while True:
8600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            line = self.readline()
8610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not line: break
8620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            result.append(line)
8630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return result
8640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def tell(self):
8660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the current file position.
8670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
8680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closed:
8690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("I/O operation on closed file")
8700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.position
8720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def seek(self, pos, whence=os.SEEK_SET):
8740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Seek to a position in the file.
8750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
8760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closed:
8770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("I/O operation on closed file")
8780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if whence == os.SEEK_SET:
8800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.position = min(max(pos, 0), self.size)
8810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif whence == os.SEEK_CUR:
8820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if pos < 0:
8830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.position = max(self.position + pos, 0)
8840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
8850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.position = min(self.position + pos, self.size)
8860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif whence == os.SEEK_END:
8870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.position = max(min(self.size + pos, self.size), 0)
8880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
8890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("Invalid argument")
8900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.buffer = ""
8920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj.seek(self.position)
8930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
8950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Close the file object.
8960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
8970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.closed = True
8980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
8990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __iter__(self):
9000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Get an iterator over the file's lines.
9010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
9020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while True:
9030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            line = self.readline()
9040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not line:
9050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
9060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            yield line
9070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#class ExFileObject
9080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------
9100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Exported Classes
9110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#------------------
9120a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarInfo(object):
9130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Informational class which holds the details about an
9140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       archive member given by a tar header block.
9150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       TarInfo objects are returned by TarFile.getmember(),
9160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       TarFile.getmembers() and TarFile.gettarinfo() and are
9170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       usually created internally.
9180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
9190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, name=""):
9210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Construct a TarInfo object. name is the optional name
9220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           of the member.
9230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
9240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.name = name        # member name
9250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.mode = 0644        # file permissions
9260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.uid = 0            # user id
9270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.gid = 0            # group id
9280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.size = 0           # file size
9290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.mtime = 0          # modification time
9300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.chksum = 0         # header checksum
9310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.type = REGTYPE     # member type
9320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.linkname = ""      # link name
9330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.uname = ""         # user name
9340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.gname = ""         # group name
9350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.devmajor = 0       # device major number
9360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.devminor = 0       # device minor number
9370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset = 0         # the tar header starts here
9390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset_data = 0    # the file's data starts here
9400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pax_headers = {}   # pax header information
9420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # In pax headers the "name" and "linkname" field are called
9440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # "path" and "linkpath".
9450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _getpath(self):
9460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.name
9470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _setpath(self, name):
9480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.name = name
9490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    path = property(_getpath, _setpath)
9500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _getlinkpath(self):
9520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.linkname
9530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _setlinkpath(self, linkname):
9540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.linkname = linkname
9550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    linkpath = property(_getlinkpath, _setlinkpath)
9560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __repr__(self):
9580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
9590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def get_info(self, encoding, errors):
9610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the TarInfo's attributes as a dictionary.
9620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
9630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info = {
9640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "name":     self.name,
9650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "mode":     self.mode & 07777,
9660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "uid":      self.uid,
9670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "gid":      self.gid,
9680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "size":     self.size,
9690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "mtime":    self.mtime,
9700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "chksum":   self.chksum,
9710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "type":     self.type,
9720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "linkname": self.linkname,
9730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "uname":    self.uname,
9740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "gname":    self.gname,
9750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "devmajor": self.devmajor,
9760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "devminor": self.devminor
9770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        }
9780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
9800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            info["name"] += "/"
9810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for key in ("name", "linkname", "uname", "gname"):
9830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if type(info[key]) is unicode:
9840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                info[key] = info[key].encode(encoding, errors)
9850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return info
9870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
9890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a tar header as a string of 512 byte blocks.
9900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
9910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info = self.get_info(encoding, errors)
9920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
9930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if format == USTAR_FORMAT:
9940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.create_ustar_header(info)
9950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif format == GNU_FORMAT:
9960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.create_gnu_header(info)
9970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif format == PAX_FORMAT:
9980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.create_pax_header(info, encoding, errors)
9990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
10000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("invalid format")
10010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def create_ustar_header(self, info):
10030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the object as a ustar header block.
10040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
10050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["magic"] = POSIX_MAGIC
10060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(info["linkname"]) > LENGTH_LINK:
10080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("linkname is too long")
10090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(info["name"]) > LENGTH_NAME:
10110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            info["prefix"], info["name"] = self._posix_split_name(info["name"])
10120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self._create_header(info, USTAR_FORMAT)
10140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def create_gnu_header(self, info):
10160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the object as a GNU header block sequence.
10170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
10180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["magic"] = GNU_MAGIC
10190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = ""
10210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(info["linkname"]) > LENGTH_LINK:
10220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
10230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(info["name"]) > LENGTH_NAME:
10250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
10260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return buf + self._create_header(info, GNU_FORMAT)
10280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def create_pax_header(self, info, encoding, errors):
10300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the object as a ustar header block. If it cannot be
10310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           represented this way, prepend a pax extended header sequence
10320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           with supplement information.
10330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
10340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["magic"] = POSIX_MAGIC
10350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pax_headers = self.pax_headers.copy()
10360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Test string fields for values that exceed the field length or cannot
10380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # be represented in ASCII encoding.
10390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for name, hname, length in (
10400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
10410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                ("uname", "uname", 32), ("gname", "gname", 32)):
10420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if hname in pax_headers:
10440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # The pax header has priority.
10450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                continue
10460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            val = info[name].decode(encoding, errors)
10480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Try to encode the string as ASCII.
10500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
10510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                val.encode("ascii")
10520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except UnicodeEncodeError:
10530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pax_headers[hname] = val
10540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                continue
10550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if len(info[name]) > length:
10570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pax_headers[hname] = val
10580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Test number fields for values that exceed the field limit or values
10600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # that like to be stored as float.
10610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
10620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if name in pax_headers:
10630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # The pax header has priority. Avoid overflow.
10640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                info[name] = 0
10650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                continue
10660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            val = info[name]
10680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
10690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pax_headers[name] = unicode(val)
10700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                info[name] = 0
10710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Create a pax extended header if necessary.
10730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if pax_headers:
10740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = self._create_pax_generic_header(pax_headers)
10750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
10760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = ""
10770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return buf + self._create_header(info, USTAR_FORMAT)
10790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
10810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def create_pax_global_header(cls, pax_headers):
10820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the object as a pax global header block sequence.
10830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
10840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
10850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _posix_split_name(self, name):
10870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Split a name longer than 100 chars into a prefix
10880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           and a name part.
10890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
10900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        prefix = name[:LENGTH_PREFIX + 1]
10910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while prefix and prefix[-1] != "/":
10920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            prefix = prefix[:-1]
10930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name = name[len(prefix):]
10950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        prefix = prefix[:-1]
10960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
10970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not prefix or len(name) > LENGTH_NAME:
10980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("name is too long")
10990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return prefix, name
11000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @staticmethod
11020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _create_header(info, format):
11030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a header block. info is a dictionary with file
11040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           information, format must be one of the *_FORMAT constants.
11050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
11060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        parts = [
11070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            stn(info.get("name", ""), 100),
11080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            itn(info.get("mode", 0) & 07777, 8, format),
11090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            itn(info.get("uid", 0), 8, format),
11100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            itn(info.get("gid", 0), 8, format),
11110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            itn(info.get("size", 0), 12, format),
11120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            itn(info.get("mtime", 0), 12, format),
11130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "        ", # checksum field
11140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            info.get("type", REGTYPE),
11150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            stn(info.get("linkname", ""), 100),
11160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            stn(info.get("magic", POSIX_MAGIC), 8),
11170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            stn(info.get("uname", ""), 32),
11180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            stn(info.get("gname", ""), 32),
11190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            itn(info.get("devmajor", 0), 8, format),
11200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            itn(info.get("devminor", 0), 8, format),
11210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            stn(info.get("prefix", ""), 155)
11220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        ]
11230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
11250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
11260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
11270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return buf
11280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @staticmethod
11300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _create_payload(payload):
11310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the string payload filled with zero bytes
11320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           up to the next 512 byte border.
11330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
11340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        blocks, remainder = divmod(len(payload), BLOCKSIZE)
11350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if remainder > 0:
11360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            payload += (BLOCKSIZE - remainder) * NUL
11370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return payload
11380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
11400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _create_gnu_long_header(cls, name, type):
11410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
11420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           for name.
11430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
11440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        name += NUL
11450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info = {}
11470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["name"] = "././@LongLink"
11480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["type"] = type
11490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["size"] = len(name)
11500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["magic"] = GNU_MAGIC
11510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # create extended header + name blocks.
11530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return cls._create_header(info, USTAR_FORMAT) + \
11540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                cls._create_payload(name)
11550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
11570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
11580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a POSIX.1-2001 extended or global header sequence
11590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           that contains a list of keyword, value pairs. The values
11600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           must be unicode objects.
11610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
11620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        records = []
11630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for keyword, value in pax_headers.iteritems():
11640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            keyword = keyword.encode("utf8")
11650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            value = value.encode("utf8")
11660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
11670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n = p = 0
11680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while True:
11690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                n = l + len(str(p))
11700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if n == p:
11710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
11720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                p = n
11730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            records.append("%d %s=%s\n" % (p, keyword, value))
11740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        records = "".join(records)
11750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # We use a hardcoded "././@PaxHeader" name like star does
11770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # instead of the one that POSIX recommends.
11780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info = {}
11790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["name"] = "././@PaxHeader"
11800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["type"] = type
11810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["size"] = len(records)
11820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        info["magic"] = POSIX_MAGIC
11830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Create pax header + record blocks.
11850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return cls._create_header(info, USTAR_FORMAT) + \
11860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                cls._create_payload(records)
11870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
11890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def frombuf(cls, buf):
11900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Construct a TarInfo object from a 512 byte string buffer.
11910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
11920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(buf) == 0:
11930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise EmptyHeaderError("empty header")
11940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(buf) != BLOCKSIZE:
11950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise TruncatedHeaderError("truncated header")
11960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if buf.count(NUL) == BLOCKSIZE:
11970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise EOFHeaderError("end of file header")
11980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
11990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        chksum = nti(buf[148:156])
12000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if chksum not in calc_chksums(buf):
12010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise InvalidHeaderError("bad checksum")
12020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj = cls()
12040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.buf = buf
12050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.name = nts(buf[0:100])
12060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.mode = nti(buf[100:108])
12070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.uid = nti(buf[108:116])
12080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.gid = nti(buf[116:124])
12090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.size = nti(buf[124:136])
12100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.mtime = nti(buf[136:148])
12110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.chksum = chksum
12120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.type = buf[156:157]
12130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.linkname = nts(buf[157:257])
12140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.uname = nts(buf[265:297])
12150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.gname = nts(buf[297:329])
12160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.devmajor = nti(buf[329:337])
12170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.devminor = nti(buf[337:345])
12180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        prefix = nts(buf[345:500])
12190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Old V7 tar format represents a directory as a regular
12210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # file with a trailing slash.
12220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if obj.type == AREGTYPE and obj.name.endswith("/"):
12230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            obj.type = DIRTYPE
12240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Remove redundant slashes from directories.
12260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if obj.isdir():
12270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            obj.name = obj.name.rstrip("/")
12280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Reconstruct a ustar longname.
12300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if prefix and obj.type not in GNU_TYPES:
12310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            obj.name = prefix + "/" + obj.name
12320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return obj
12330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
12350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def fromtarfile(cls, tarfile):
12360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the next TarInfo object from TarFile object
12370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           tarfile.
12380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
12390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = tarfile.fileobj.read(BLOCKSIZE)
12400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj = cls.frombuf(buf)
12410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
12420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return obj._proc_member(tarfile)
12430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #--------------------------------------------------------------------------
12450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # The following are methods that are called depending on the type of a
12460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # member. The entry point is _proc_member() which can be overridden in a
12470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
12480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # implement the following
12490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # operations:
12500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # 1. Set self.offset_data to the position where the data blocks begin,
12510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #    if there is data that follows.
12520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # 2. Set tarfile.offset to the position where the next member's header will
12530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #    begin.
12540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # 3. Return self or another valid TarInfo object.
12550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _proc_member(self, tarfile):
12560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Choose the right processing method depending on
12570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           the type and call it.
12580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
12590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
12600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self._proc_gnulong(tarfile)
12610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif self.type == GNUTYPE_SPARSE:
12620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self._proc_sparse(tarfile)
12630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
12640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self._proc_pax(tarfile)
12650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
12660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self._proc_builtin(tarfile)
12670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _proc_builtin(self, tarfile):
12690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Process a builtin type or an unknown type which
12700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           will be treated as a regular file.
12710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
12720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset_data = tarfile.fileobj.tell()
12730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        offset = self.offset_data
12740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.isreg() or self.type not in SUPPORTED_TYPES:
12750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Skip the following data blocks.
12760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            offset += self._block(self.size)
12770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarfile.offset = offset
12780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Patch the TarInfo object with saved global
12800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # header information.
12810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
12820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self
12840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _proc_gnulong(self, tarfile):
12860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Process the blocks that hold a GNU longname
12870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           or longlink member.
12880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
12890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = tarfile.fileobj.read(self._block(self.size))
12900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Fetch the next header and process it.
12920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
12930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            next = self.fromtarfile(tarfile)
12940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except HeaderError:
12950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise SubsequentHeaderError("missing or bad subsequent header")
12960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
12970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Patch the TarInfo object from the next header with
12980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # the longname information.
12990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        next.offset = self.offset
13000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.type == GNUTYPE_LONGNAME:
13010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            next.name = nts(buf)
13020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif self.type == GNUTYPE_LONGLINK:
13030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            next.linkname = nts(buf)
13040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return next
13060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _proc_sparse(self, tarfile):
13080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Process a GNU sparse header plus extra headers.
13090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
13100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = self.buf
13110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        sp = _ringbuffer()
13120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pos = 386
13130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        lastpos = 0L
13140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        realpos = 0L
13150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # There are 4 possible sparse structs in the
13160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # first header.
13170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for i in xrange(4):
13180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
13190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                offset = nti(buf[pos:pos + 12])
13200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                numbytes = nti(buf[pos + 12:pos + 24])
13210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except ValueError:
13220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
13230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if offset > lastpos:
13240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                sp.append(_hole(lastpos, offset - lastpos))
13250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            sp.append(_data(offset, numbytes, realpos))
13260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            realpos += numbytes
13270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            lastpos = offset + numbytes
13280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pos += 24
13290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        isextended = ord(buf[482])
13310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        origsize = nti(buf[483:495])
13320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # If the isextended flag is given,
13340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # there are extra headers to process.
13350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while isextended == 1:
13360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            buf = tarfile.fileobj.read(BLOCKSIZE)
13370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pos = 0
13380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for i in xrange(21):
13390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
13400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    offset = nti(buf[pos:pos + 12])
13410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    numbytes = nti(buf[pos + 12:pos + 24])
13420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except ValueError:
13430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
13440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if offset > lastpos:
13450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    sp.append(_hole(lastpos, offset - lastpos))
13460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                sp.append(_data(offset, numbytes, realpos))
13470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                realpos += numbytes
13480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                lastpos = offset + numbytes
13490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pos += 24
13500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            isextended = ord(buf[504])
13510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if lastpos < origsize:
13530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            sp.append(_hole(lastpos, origsize - lastpos))
13540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.sparse = sp
13560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset_data = tarfile.fileobj.tell()
13580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarfile.offset = self.offset_data + self._block(self.size)
13590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.size = origsize
13600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self
13620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _proc_pax(self, tarfile):
13640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Process an extended or global header as described in
13650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           POSIX.1-2001.
13660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
13670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Read the header information.
13680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = tarfile.fileobj.read(self._block(self.size))
13690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # A pax header stores supplemental information for either
13710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # the following file (extended) or all following files
13720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # (global).
13730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.type == XGLTYPE:
13740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pax_headers = tarfile.pax_headers
13750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
13760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pax_headers = tarfile.pax_headers.copy()
13770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Parse pax header information. A record looks like that:
13790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # "%d %s=%s\n" % (length, keyword, value). length is the size
13800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # of the complete record including the length field itself and
13810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # the newline. keyword and value are both UTF-8 encoded strings.
13820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
13830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pos = 0
13840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while True:
13850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            match = regex.match(buf, pos)
13860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not match:
13870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
13880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            length, keyword = match.groups()
13900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            length = int(length)
13910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            value = buf[match.end(2) + 1:match.start(1) + length - 1]
13920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            keyword = keyword.decode("utf8")
13940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            value = value.decode("utf8")
13950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pax_headers[keyword] = value
13970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pos += length
13980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
13990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Fetch the next header.
14000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
14010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            next = self.fromtarfile(tarfile)
14020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except HeaderError:
14030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise SubsequentHeaderError("missing or bad subsequent header")
14040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
14060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Patch the TarInfo object with the extended header info.
14070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
14080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            next.offset = self.offset
14090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if "size" in pax_headers:
14110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # If the extended header replaces the size field,
14120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # we need to recalculate the offset where the next
14130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # header starts.
14140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                offset = next.offset_data
14150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if next.isreg() or next.type not in SUPPORTED_TYPES:
14160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    offset += next._block(next.size)
14170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarfile.offset = offset
14180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return next
14200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _apply_pax_info(self, pax_headers, encoding, errors):
14220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Replace fields with supplemental information from a previous
14230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           pax extended or global header.
14240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
14250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for keyword, value in pax_headers.iteritems():
14260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if keyword not in PAX_FIELDS:
14270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                continue
14280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if keyword == "path":
14300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                value = value.rstrip("/")
14310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if keyword in PAX_NUMBER_FIELDS:
14330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
14340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    value = PAX_NUMBER_FIELDS[keyword](value)
14350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except ValueError:
14360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    value = 0
14370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
14380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                value = uts(value, encoding, errors)
14390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            setattr(self, keyword, value)
14410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.pax_headers = pax_headers.copy()
14430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _block(self, count):
14450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Round up a byte count by BLOCKSIZE and return it,
14460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           e.g. _block(834) => 1024.
14470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
14480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        blocks, remainder = divmod(count, BLOCKSIZE)
14490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if remainder:
14500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            blocks += 1
14510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return blocks * BLOCKSIZE
14520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isreg(self):
14540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type in REGULAR_TYPES
14550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isfile(self):
14560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.isreg()
14570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isdir(self):
14580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type == DIRTYPE
14590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def issym(self):
14600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type == SYMTYPE
14610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def islnk(self):
14620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type == LNKTYPE
14630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def ischr(self):
14640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type == CHRTYPE
14650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isblk(self):
14660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type == BLKTYPE
14670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isfifo(self):
14680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type == FIFOTYPE
14690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def issparse(self):
14700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type == GNUTYPE_SPARSE
14710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def isdev(self):
14720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
14730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class TarInfo
14740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14750a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarFile(object):
14760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """The TarFile Class provides an interface to tar archives.
14770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
14780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
14800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    dereference = False         # If true, add content of linked file to the
14820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                # tar file, else the link.
14830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ignore_zeros = False        # If true, skips empty or invalid blocks and
14850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                # continues processing.
14860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    errorlevel = 1              # If 0, fatal errors only appear in debug
14880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                # messages (if debug >= 0). If > 0, errors
14890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                # are passed to the caller as exceptions.
14900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    format = DEFAULT_FORMAT     # The format to use when creating an archive.
14920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    encoding = ENCODING         # Encoding for 8-bit character strings.
14940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    errors = None               # Error handler for unicode conversion.
14960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    tarinfo = TarInfo           # The default TarInfo class to use.
14980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
14990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    fileobject = ExFileObject   # The default ExFileObject class to use.
15000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, name=None, mode="r", fileobj=None, format=None,
15020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
15030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            errors=None, pax_headers=None, debug=None, errorlevel=None):
15040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
15050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           read from an existing archive, 'a' to append data to an existing
15060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           file or 'w' to create a new file overwriting an existing one. `mode'
15070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           defaults to 'r'.
15080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           If `fileobj' is given, it is used for reading or writing data. If it
15090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           can be determined, `mode' is overridden by `fileobj's mode.
15100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           `fileobj' is not closed, when TarFile is closed.
15110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
15120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(mode) > 1 or mode not in "raw":
15130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("mode must be 'r', 'a' or 'w'")
15140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.mode = mode
15150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
15160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not fileobj:
15180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self.mode == "a" and not os.path.exists(name):
15190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # Create nonexistent files in append mode.
15200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.mode = "w"
15210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._mode = "wb"
15220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fileobj = bltn_open(name, self._mode)
15230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._extfileobj = False
15240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
15250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if name is None and hasattr(fileobj, "name"):
15260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                name = fileobj.name
15270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if hasattr(fileobj, "mode"):
15280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._mode = fileobj.mode
15290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._extfileobj = True
15300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.name = os.path.abspath(name) if name else None
15310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj = fileobj
15320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Init attributes.
15340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if format is not None:
15350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.format = format
15360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo is not None:
15370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.tarinfo = tarinfo
15380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if dereference is not None:
15390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.dereference = dereference
15400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if ignore_zeros is not None:
15410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.ignore_zeros = ignore_zeros
15420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if encoding is not None:
15430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.encoding = encoding
15440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if errors is not None:
15460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.errors = errors
15470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif mode == "r":
15480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.errors = "utf-8"
15490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
15500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.errors = "strict"
15510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if pax_headers is not None and self.format == PAX_FORMAT:
15530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.pax_headers = pax_headers
15540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
15550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.pax_headers = {}
15560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if debug is not None:
15580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.debug = debug
15590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if errorlevel is not None:
15600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.errorlevel = errorlevel
15610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Init datastructures.
15630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.closed = False
15640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.members = []       # list of members as TarInfo objects
15650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._loaded = False    # flag if all members have been read
15660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset = self.fileobj.tell()
15670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                # current position in the archive file
15680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.inodes = {}        # dictionary caching the inodes of
15690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                # archive members already added
15700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
15720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self.mode == "r":
15730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.firstmember = None
15740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.firstmember = self.next()
15750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self.mode == "a":
15770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # Move to the end of the archive,
15780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # before the first empty block.
15790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                while True:
15800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.fileobj.seek(self.offset)
15810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    try:
15820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        tarinfo = self.tarinfo.fromtarfile(self)
15830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        self.members.append(tarinfo)
15840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    except EOFHeaderError:
15850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        self.fileobj.seek(self.offset)
15860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        break
15870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    except HeaderError, e:
15880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        raise ReadError(str(e))
15890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self.mode in "aw":
15910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._loaded = True
15920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
15930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self.pax_headers:
15940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
15950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.fileobj.write(buf)
15960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.offset += len(buf)
15970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except:
15980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not self._extfileobj:
15990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.fileobj.close()
16000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.closed = True
16010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise
16020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _getposix(self):
16040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.format == USTAR_FORMAT
16050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _setposix(self, value):
16060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import warnings
16070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        warnings.warn("use the format attribute instead", DeprecationWarning,
16080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                      2)
16090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if value:
16100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.format = USTAR_FORMAT
16110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
16120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.format = GNU_FORMAT
16130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    posix = property(_getposix, _setposix)
16140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #--------------------------------------------------------------------------
16160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Below are the classmethods which act as alternate constructors to the
16170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # TarFile class. The open() method is the only one that is needed for
16180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # public use; it is the "super"-constructor and is able to select an
16190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # adequate "sub"-constructor for a particular compression using the mapping
16200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # from OPEN_METH.
16210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #
16220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # This concept allows one to subclass TarFile without losing the comfort of
16230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # the super-constructor. A sub-constructor is registered and made available
16240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # by adding it to the mapping in OPEN_METH.
16250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
16270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
16280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Open a tar archive for reading, writing or appending. Return
16290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           an appropriate TarFile class.
16300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           mode:
16320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r' or 'r:*' open for reading with transparent compression
16330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r:'         open for reading exclusively uncompressed
16340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r:gz'       open for reading with gzip compression
16350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r:bz2'      open for reading with bzip2 compression
16360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'a' or 'a:'  open for appending, creating the file if necessary
16370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'w' or 'w:'  open for writing without compression
16380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'w:gz'       open for writing with gzip compression
16390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'w:bz2'      open for writing with bzip2 compression
16400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r|*'        open a stream of tar blocks with transparent compression
16420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r|'         open an uncompressed stream of tar blocks for reading
16430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r|gz'       open a gzip compressed stream of tar blocks
16440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'r|bz2'      open a bzip2 compressed stream of tar blocks
16450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'w|'         open an uncompressed stream for writing
16460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'w|gz'       open a gzip compressed stream for writing
16470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'w|bz2'      open a bzip2 compressed stream for writing
16480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
16490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not name and not fileobj:
16510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("nothing to open")
16520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if mode in ("r", "r:*"):
16540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Find out which *open() is appropriate for opening the file.
16550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for comptype in cls.OPEN_METH:
16560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                func = getattr(cls, cls.OPEN_METH[comptype])
16570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if fileobj is not None:
16580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    saved_pos = fileobj.tell()
16590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                try:
16600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return func(name, "r", fileobj, **kwargs)
16610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                except (ReadError, CompressionError), e:
16620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if fileobj is not None:
16630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        fileobj.seek(saved_pos)
16640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    continue
16650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ReadError("file could not be opened successfully")
16660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif ":" in mode:
16680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            filemode, comptype = mode.split(":", 1)
16690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            filemode = filemode or "r"
16700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            comptype = comptype or "tar"
16710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Select the *open() function according to
16730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # given compression.
16740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if comptype in cls.OPEN_METH:
16750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                func = getattr(cls, cls.OPEN_METH[comptype])
16760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
16770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise CompressionError("unknown compression type %r" % comptype)
16780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return func(name, filemode, fileobj, **kwargs)
16790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif "|" in mode:
16810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            filemode, comptype = mode.split("|", 1)
16820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            filemode = filemode or "r"
16830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            comptype = comptype or "tar"
16840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if filemode not in "rw":
16860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ValueError("mode must be 'r' or 'w'")
16870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            t = cls(name, filemode,
16890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    _Stream(name, filemode, comptype, fileobj, bufsize),
16900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    **kwargs)
16910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            t._extfileobj = False
16920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return t
16930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif mode in "aw":
16950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return cls.taropen(name, mode, fileobj, **kwargs)
16960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise ValueError("undiscernible mode")
16980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
16990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
17000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
17010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Open uncompressed tar archive name for reading or writing.
17020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
17030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(mode) > 1 or mode not in "raw":
17040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("mode must be 'r', 'a' or 'w'")
17050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return cls(name, mode, fileobj, **kwargs)
17060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
17080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
17090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Open gzip compressed tar archive name for reading or writing.
17100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           Appending is not allowed.
17110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
17120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(mode) > 1 or mode not in "rw":
17130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("mode must be 'r' or 'w'")
17140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
17160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import gzip
17170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            gzip.GzipFile
17180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except (ImportError, AttributeError):
17190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise CompressionError("gzip module is not available")
17200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if fileobj is None:
17220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fileobj = bltn_open(name, mode + "b")
17230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
17250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            t = cls.taropen(name, mode,
17260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                gzip.GzipFile(name, mode, compresslevel, fileobj),
17270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                **kwargs)
17280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except IOError:
17290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ReadError("not a gzip file")
17300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t._extfileobj = False
17310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return t
17320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    @classmethod
17340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
17350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Open bzip2 compressed tar archive name for reading or writing.
17360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           Appending is not allowed.
17370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
17380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(mode) > 1 or mode not in "rw":
17390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("mode must be 'r' or 'w'.")
17400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
17420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import bz2
17430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
17440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise CompressionError("bz2 module is not available")
17450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if fileobj is not None:
17470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fileobj = _BZ2Proxy(fileobj, mode)
17480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
17490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
17500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
17520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            t = cls.taropen(name, mode, fileobj, **kwargs)
17530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except (IOError, EOFError):
17540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ReadError("not a bzip2 file")
17550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t._extfileobj = False
17560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return t
17570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # All *open() methods are registered here.
17590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    OPEN_METH = {
17600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        "tar": "taropen",   # uncompressed tar
17610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        "gz":  "gzopen",    # gzip compressed tar
17620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        "bz2": "bz2open"    # bzip2 compressed tar
17630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    }
17640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #--------------------------------------------------------------------------
17660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # The public methods which TarFile provides:
17670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
17690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Close the TarFile. In write-mode, two finishing zero blocks are
17700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           appended to the archive.
17710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
17720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closed:
17730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
17740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.mode in "aw":
17760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.write(NUL * (BLOCKSIZE * 2))
17770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.offset += (BLOCKSIZE * 2)
17780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # fill up the end with zero-blocks
17790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # (like option -b20 for tar does)
17800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            blocks, remainder = divmod(self.offset, RECORDSIZE)
17810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if remainder > 0:
17820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.fileobj.write(NUL * (RECORDSIZE - remainder))
17830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not self._extfileobj:
17850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.fileobj.close()
17860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.closed = True
17870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getmember(self, name):
17890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return a TarInfo object for member `name'. If `name' can not be
17900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           found in the archive, KeyError is raised. If a member occurs more
17910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           than once in the archive, its last occurrence is assumed to be the
17920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           most up-to-date version.
17930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
17940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo = self._getmember(name)
17950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo is None:
17960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise KeyError("filename %r not found" % name)
17970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return tarinfo
17980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
17990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getmembers(self):
18000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the members of the archive as a list of TarInfo objects. The
18010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           list has the same order as the members in the archive.
18020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
18030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check()
18040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not self._loaded:    # if we want to obtain a list of
18050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._load()        # all members, we first have to
18060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                # scan the whole archive.
18070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.members
18080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getnames(self):
18100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the members of the archive as a list of their names. It has
18110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           the same order as the list returned by getmembers().
18120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
18130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return [tarinfo.name for tarinfo in self.getmembers()]
18140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def gettarinfo(self, name=None, arcname=None, fileobj=None):
18160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Create a TarInfo object for either the file `name' or the file
18170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           object `fileobj' (using os.fstat on its file descriptor). You can
18180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           modify some of the TarInfo's attributes before you add it using
18190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           addfile(). If given, `arcname' specifies an alternative name for the
18200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           file in the archive.
18210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
18220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check("aw")
18230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # When fileobj is given, replace name by
18250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # fileobj's real name.
18260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if fileobj is not None:
18270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            name = fileobj.name
18280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Building the name of the member in the archive.
18300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Backward slashes are converted to forward slashes,
18310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Absolute paths are turned to relative paths.
18320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if arcname is None:
18330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            arcname = name
18340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        drv, arcname = os.path.splitdrive(arcname)
18350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        arcname = arcname.replace(os.sep, "/")
18360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        arcname = arcname.lstrip("/")
18370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Now, fill the TarInfo object with
18390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # information specific for the file.
18400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo = self.tarinfo()
18410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.tarfile = self
18420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Use os.stat or os.lstat, depending on platform
18440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # and if symlinks shall be resolved.
18450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if fileobj is None:
18460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if hasattr(os, "lstat") and not self.dereference:
18470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                statres = os.lstat(name)
18480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
18490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                statres = os.stat(name)
18500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
18510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            statres = os.fstat(fileobj.fileno())
18520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        linkname = ""
18530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        stmd = statres.st_mode
18550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if stat.S_ISREG(stmd):
18560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            inode = (statres.st_ino, statres.st_dev)
18570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not self.dereference and statres.st_nlink > 1 and \
18580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    inode in self.inodes and arcname != self.inodes[inode]:
18590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # Is it a hardlink to an already
18600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # archived file?
18610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                type = LNKTYPE
18620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                linkname = self.inodes[inode]
18630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
18640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # The inode is added only if its valid.
18650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # For win32 it is always 0.
18660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                type = REGTYPE
18670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if inode[0]:
18680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.inodes[inode] = arcname
18690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif stat.S_ISDIR(stmd):
18700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = DIRTYPE
18710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif stat.S_ISFIFO(stmd):
18720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = FIFOTYPE
18730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif stat.S_ISLNK(stmd):
18740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = SYMTYPE
18750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            linkname = os.readlink(name)
18760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif stat.S_ISCHR(stmd):
18770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = CHRTYPE
18780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif stat.S_ISBLK(stmd):
18790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            type = BLKTYPE
18800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
18810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return None
18820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
18830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Fill the TarInfo object with all
18840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # information we can get.
18850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.name = arcname
18860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.mode = stmd
18870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.uid = statres.st_uid
18880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.gid = statres.st_gid
18890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if type == REGTYPE:
18900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo.size = statres.st_size
18910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
18920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo.size = 0L
18930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.mtime = statres.st_mtime
18940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.type = type
18950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo.linkname = linkname
18960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if pwd:
18970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
18980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
18990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except KeyError:
19000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pass
19010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if grp:
19020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
19030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
19040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except KeyError:
19050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                pass
19060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if type in (CHRTYPE, BLKTYPE):
19080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if hasattr(os, "major") and hasattr(os, "minor"):
19090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarinfo.devmajor = os.major(statres.st_rdev)
19100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarinfo.devminor = os.minor(statres.st_rdev)
19110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return tarinfo
19120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def list(self, verbose=True):
19140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Print a table of contents to sys.stdout. If `verbose' is False, only
19150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           the names of the members are printed. If it is True, an `ls -l'-like
19160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           output is produced.
19170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
19180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check()
19190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for tarinfo in self:
19210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if verbose:
19220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                print filemode(tarinfo.mode),
19230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                print "%s/%s" % (tarinfo.uname or tarinfo.uid,
19240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                 tarinfo.gname or tarinfo.gid),
19250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if tarinfo.ischr() or tarinfo.isblk():
19260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    print "%10s" % ("%d,%d" \
19270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                                    % (tarinfo.devmajor, tarinfo.devminor)),
19280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
19290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    print "%10d" % tarinfo.size,
19300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                print "%d-%02d-%02d %02d:%02d:%02d" \
19310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                      % time.localtime(tarinfo.mtime)[:6],
19320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            print tarinfo.name + ("/" if tarinfo.isdir() else ""),
19340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if verbose:
19360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if tarinfo.issym():
19370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    print "->", tarinfo.linkname,
19380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if tarinfo.islnk():
19390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    print "link to", tarinfo.linkname,
19400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            print
19410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
19430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Add the file `name' to the archive. `name' may be any type of file
19440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           (directory, fifo, symbolic link, etc.). If given, `arcname'
19450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           specifies an alternative name for the file in the archive.
19460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           Directories are added recursively by default. This can be avoided by
19470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           setting `recursive' to False. `exclude' is a function that should
19480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           return True for each filename to be excluded. `filter' is a function
19490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           that expects a TarInfo object argument and returns the changed
19500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TarInfo object, if it returns None the TarInfo object will be
19510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           excluded from the archive.
19520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
19530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check("aw")
19540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if arcname is None:
19560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            arcname = name
19570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Exclude pathnames.
19590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if exclude is not None:
19600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            import warnings
19610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            warnings.warn("use the filter argument instead",
19620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    DeprecationWarning, 2)
19630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if exclude(name):
19640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._dbg(2, "tarfile: Excluded %r" % name)
19650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
19660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Skip if somebody tries to archive the archive...
19680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.name is not None and os.path.abspath(name) == self.name:
19690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._dbg(2, "tarfile: Skipped %r" % name)
19700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
19710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._dbg(1, name)
19730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Create a TarInfo object from the file.
19750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo = self.gettarinfo(name, arcname)
19760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo is None:
19780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._dbg(1, "tarfile: Unsupported type %r" % name)
19790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
19800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Change or exclude the TarInfo object.
19820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if filter is not None:
19830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = filter(tarinfo)
19840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if tarinfo is None:
19850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._dbg(2, "tarfile: Excluded %r" % name)
19860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return
19870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Append the tar header and data to the archive.
19890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo.isreg():
19900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            with bltn_open(name, "rb") as f:
19910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.addfile(tarinfo, f)
19920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
19930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.isdir():
19940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.addfile(tarinfo)
19950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if recursive:
19960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for f in os.listdir(name):
19970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.add(os.path.join(name, f), os.path.join(arcname, f),
19980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            recursive, exclude, filter)
19990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
20010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.addfile(tarinfo)
20020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def addfile(self, tarinfo, fileobj=None):
20040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
20050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           given, tarinfo.size bytes are read from it and added to the archive.
20060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           You can create TarInfo objects using gettarinfo().
20070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           On Windows platforms, `fileobj' should always be opened with mode
20080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           'rb' to avoid irritation about the file size.
20090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
20100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check("aw")
20110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo = copy.copy(tarinfo)
20130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
20150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj.write(buf)
20160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset += len(buf)
20170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # If there's data to follow, append it.
20190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if fileobj is not None:
20200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            copyfileobj(fileobj, self.fileobj, tarinfo.size)
20210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
20220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if remainder > 0:
20230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
20240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                blocks += 1
20250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.offset += blocks * BLOCKSIZE
20260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.members.append(tarinfo)
20280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def extractall(self, path=".", members=None):
20300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Extract all members from the archive to the current working
20310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           directory and set owner, modification time and permissions on
20320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           directories afterwards. `path' specifies a different directory
20330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           to extract to. `members' is optional and must be a subset of the
20340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           list returned by getmembers().
20350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
20360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        directories = []
20370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if members is None:
20390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            members = self
20400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for tarinfo in members:
20420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if tarinfo.isdir():
20430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # Extract directories with a safe mode.
20440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                directories.append(tarinfo)
20450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarinfo = copy.copy(tarinfo)
20460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarinfo.mode = 0700
20470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.extract(tarinfo, path)
20480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Reverse sort directories.
20500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        directories.sort(key=operator.attrgetter('name'))
20510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        directories.reverse()
20520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Set correct owner, mtime and filemode on directories.
20540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for tarinfo in directories:
20550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            dirpath = os.path.join(path, tarinfo.name)
20560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
20570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.chown(tarinfo, dirpath)
20580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.utime(tarinfo, dirpath)
20590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.chmod(tarinfo, dirpath)
20600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except ExtractError, e:
20610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self.errorlevel > 1:
20620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise
20630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
20640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._dbg(1, "tarfile: %s" % e)
20650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def extract(self, member, path=""):
20670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Extract a member from the archive to the current working directory,
20680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           using its full name. Its file information is extracted as accurately
20690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           as possible. `member' may be a filename or a TarInfo object. You can
20700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           specify a different directory using `path'.
20710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
20720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check("r")
20730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(member, basestring):
20750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = self.getmember(member)
20760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
20770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = member
20780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Prepare the link target for makelink().
20800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo.islnk():
20810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
20820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
20840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
20850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except EnvironmentError, e:
20860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self.errorlevel > 0:
20870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise
20880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
20890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if e.filename is None:
20900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._dbg(1, "tarfile: %s" % e.strerror)
20910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
20920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
20930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ExtractError, e:
20940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if self.errorlevel > 1:
20950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise
20960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
20970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._dbg(1, "tarfile: %s" % e)
20980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
20990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def extractfile(self, member):
21000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Extract a member from the archive as a file object. `member' may be
21010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           a filename or a TarInfo object. If `member' is a regular file, a
21020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           file-like object is returned. If `member' is a link, a file-like
21030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           object is constructed from the link's target. If `member' is none of
21040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           the above, None is returned.
21050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           The file-like object is read-only and provides the following
21060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           methods: read(), readline(), readlines(), seek() and tell()
21070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
21080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check("r")
21090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(member, basestring):
21110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = self.getmember(member)
21120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
21130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = member
21140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo.isreg():
21160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.fileobject(self, tarinfo)
21170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.type not in SUPPORTED_TYPES:
21190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # If a member's type is unknown, it is treated as a
21200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # regular file.
21210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return self.fileobject(self, tarinfo)
21220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.islnk() or tarinfo.issym():
21240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if isinstance(self.fileobj, _Stream):
21250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # A small but ugly workaround for the case that someone tries
21260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # to extract a (sym)link as a file-object from a non-seekable
21270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # stream of tar blocks.
21280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise StreamError("cannot extract (sym)link as file object")
21290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
21300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # A (sym)link's file object is its target's file object.
21310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return self.extractfile(self._find_link_target(tarinfo))
21320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
21330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # If there's no data associated with the member (directory, chrdev,
21340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # blkdev, etc.), return None instead of a file object.
21350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return None
21360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _extract_member(self, tarinfo, targetpath):
21380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Extract the TarInfo object tarinfo to a physical
21390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           file called targetpath.
21400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
21410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Fetch the TarInfo object for the given name
21420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # and build the destination pathname, replacing
21430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # forward slashes to platform specific separators.
21440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        targetpath = targetpath.rstrip("/")
21450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        targetpath = targetpath.replace("/", os.sep)
21460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Create all upper directories.
21480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        upperdirs = os.path.dirname(targetpath)
21490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if upperdirs and not os.path.exists(upperdirs):
21500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Create directories that are not part of the archive with
21510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # default permissions.
21520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            os.makedirs(upperdirs)
21530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo.islnk() or tarinfo.issym():
21550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
21560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
21570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._dbg(1, tarinfo.name)
21580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo.isreg():
21600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.makefile(tarinfo, targetpath)
21610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.isdir():
21620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.makedir(tarinfo, targetpath)
21630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.isfifo():
21640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.makefifo(tarinfo, targetpath)
21650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.ischr() or tarinfo.isblk():
21660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.makedev(tarinfo, targetpath)
21670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.islnk() or tarinfo.issym():
21680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.makelink(tarinfo, targetpath)
21690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif tarinfo.type not in SUPPORTED_TYPES:
21700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.makeunknown(tarinfo, targetpath)
21710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
21720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.makefile(tarinfo, targetpath)
21730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.chown(tarinfo, targetpath)
21750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not tarinfo.issym():
21760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.chmod(tarinfo, targetpath)
21770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.utime(tarinfo, targetpath)
21780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #--------------------------------------------------------------------------
21800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Below are the different file methods. They are called via
21810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # _extract_member() when extract() is called. They can be replaced in a
21820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # subclass to implement other functionality.
21830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def makedir(self, tarinfo, targetpath):
21850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Make a directory called targetpath.
21860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
21870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
21880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Use a safe mode for the directory, the real mode is set
21890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # later in _extract_member().
21900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            os.mkdir(targetpath, 0700)
21910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except EnvironmentError, e:
21920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if e.errno != errno.EEXIST:
21930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise
21940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
21950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def makefile(self, tarinfo, targetpath):
21960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Make a file called targetpath.
21970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
21980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        source = self.extractfile(tarinfo)
21990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
22000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            with bltn_open(targetpath, "wb") as target:
22010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                copyfileobj(source, target)
22020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        finally:
22030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            source.close()
22040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def makeunknown(self, tarinfo, targetpath):
22060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Make a file from a TarInfo object with an unknown type
22070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           at targetpath.
22080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
22090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.makefile(tarinfo, targetpath)
22100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._dbg(1, "tarfile: Unknown file type %r, " \
22110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                     "extracted as regular file." % tarinfo.type)
22120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def makefifo(self, tarinfo, targetpath):
22140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Make a fifo called targetpath.
22150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
22160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(os, "mkfifo"):
22170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            os.mkfifo(targetpath)
22180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
22190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ExtractError("fifo not supported by system")
22200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def makedev(self, tarinfo, targetpath):
22220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Make a character or block device called targetpath.
22230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
22240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
22250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ExtractError("special devices not supported by system")
22260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mode = tarinfo.mode
22280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo.isblk():
22290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            mode |= stat.S_IFBLK
22300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
22310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            mode |= stat.S_IFCHR
22320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        os.mknod(targetpath, mode,
22340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
22350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def makelink(self, tarinfo, targetpath):
22370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Make a (symbolic) link called targetpath. If it cannot be created
22380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao          (platform limitation), we try to make a copy of the referenced file
22390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao          instead of a link.
22400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
22410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(os, "symlink") and hasattr(os, "link"):
22420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # For systems that support symbolic and hard links.
22430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if tarinfo.issym():
22440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if os.path.lexists(targetpath):
22450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    os.unlink(targetpath)
22460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                os.symlink(tarinfo.linkname, targetpath)
22470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
22480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # See extract().
22490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if os.path.exists(tarinfo._link_target):
22500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if os.path.lexists(targetpath):
22510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        os.unlink(targetpath)
22520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    os.link(tarinfo._link_target, targetpath)
22530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
22540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._extract_member(self._find_link_target(tarinfo), targetpath)
22550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
22560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
22570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self._extract_member(self._find_link_target(tarinfo), targetpath)
22580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except KeyError:
22590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ExtractError("unable to resolve link inside archive")
22600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def chown(self, tarinfo, targetpath):
22620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Set owner of targetpath according to tarinfo.
22630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
22640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
22650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # We have to be root to do so.
22660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
22670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                g = grp.getgrnam(tarinfo.gname)[2]
22680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except KeyError:
22690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                g = tarinfo.gid
22700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
22710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                u = pwd.getpwnam(tarinfo.uname)[2]
22720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except KeyError:
22730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                u = tarinfo.uid
22740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
22750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if tarinfo.issym() and hasattr(os, "lchown"):
22760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    os.lchown(targetpath, u, g)
22770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
22780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if sys.platform != "os2emx":
22790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        os.chown(targetpath, u, g)
22800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except EnvironmentError, e:
22810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ExtractError("could not change owner")
22820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def chmod(self, tarinfo, targetpath):
22840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Set file permissions of targetpath according to tarinfo.
22850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
22860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if hasattr(os, 'chmod'):
22870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
22880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                os.chmod(targetpath, tarinfo.mode)
22890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except EnvironmentError, e:
22900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ExtractError("could not change mode")
22910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
22920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def utime(self, tarinfo, targetpath):
22930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Set modification time of targetpath according to tarinfo.
22940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
22950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not hasattr(os, 'utime'):
22960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return
22970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
22980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
22990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except EnvironmentError, e:
23000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ExtractError("could not change modification time")
23010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #--------------------------------------------------------------------------
23030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def next(self):
23040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the next member of the archive as a TarInfo object, when
23050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           TarFile is opened for reading. Return None if there is no more
23060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           available.
23070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
23080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check("ra")
23090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.firstmember is not None:
23100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            m = self.firstmember
23110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.firstmember = None
23120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return m
23130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Read the next block.
23150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.fileobj.seek(self.offset)
23160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tarinfo = None
23170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while True:
23180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            try:
23190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tarinfo = self.tarinfo.fromtarfile(self)
23200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except EOFHeaderError, e:
23210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self.ignore_zeros:
23220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._dbg(2, "0x%X: %s" % (self.offset, e))
23230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.offset += BLOCKSIZE
23240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    continue
23250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except InvalidHeaderError, e:
23260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self.ignore_zeros:
23270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self._dbg(2, "0x%X: %s" % (self.offset, e))
23280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    self.offset += BLOCKSIZE
23290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    continue
23300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                elif self.offset == 0:
23310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise ReadError(str(e))
23320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except EmptyHeaderError:
23330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self.offset == 0:
23340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise ReadError("empty file")
23350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except TruncatedHeaderError, e:
23360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if self.offset == 0:
23370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise ReadError(str(e))
23380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            except SubsequentHeaderError, e:
23390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise ReadError(str(e))
23400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            break
23410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo is not None:
23430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.members.append(tarinfo)
23440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
23450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self._loaded = True
23460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return tarinfo
23480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    #--------------------------------------------------------------------------
23500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Little helper methods:
23510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _getmember(self, name, tarinfo=None, normalize=False):
23530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Find an archive member by name from bottom to top.
23540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           If tarinfo is given, it is used as the starting point.
23550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
23560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Ensure that all members have been loaded.
23570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        members = self.getmembers()
23580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Limit the member search list up to tarinfo.
23600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo is not None:
23610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            members = members[:members.index(tarinfo)]
23620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if normalize:
23640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            name = os.path.normpath(name)
23650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for member in reversed(members):
23670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if normalize:
23680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                member_name = os.path.normpath(member.name)
23690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
23700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                member_name = member.name
23710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if name == member_name:
23730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return member
23740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _load(self):
23760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Read through the entire archive file and look for readable
23770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           members.
23780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
23790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while True:
23800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = self.next()
23810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if tarinfo is None:
23820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
23830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._loaded = True
23840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _check(self, mode=None):
23860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Check if TarFile is still open, and if the operation's mode
23870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           corresponds to TarFile's mode.
23880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
23890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.closed:
23900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError("%s is closed" % self.__class__.__name__)
23910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if mode is not None and self.mode not in mode:
23920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise IOError("bad operation for mode %r" % self.mode)
23930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
23940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _find_link_target(self, tarinfo):
23950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Find the target member of a symlink or hardlink member in the
23960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           archive.
23970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
23980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if tarinfo.issym():
23990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Always search the entire archive.
24000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
24010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            limit = None
24020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
24030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # Search the archive before the link, because a hard link is
24040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # just a reference to an already archived file.
24050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            linkname = tarinfo.linkname
24060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            limit = tarinfo
24070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        member = self._getmember(linkname, tarinfo=limit, normalize=True)
24090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if member is None:
24100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise KeyError("linkname %r not found" % linkname)
24110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return member
24120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __iter__(self):
24140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Provide an iterator object.
24150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
24160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self._loaded:
24170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return iter(self.members)
24180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
24190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return TarIter(self)
24200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def _dbg(self, level, msg):
24220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Write debugging output to sys.stderr.
24230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
24240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if level <= self.debug:
24250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            print >> sys.stderr, msg
24260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __enter__(self):
24280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self._check()
24290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self
24300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __exit__(self, type, value, traceback):
24320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if type is None:
24330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.close()
24340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
24350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # An exception occurred. We must not call close() because
24360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # it would try to write end-of-archive blocks and padding.
24370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not self._extfileobj:
24380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.fileobj.close()
24390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.closed = True
24400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# class TarFile
24410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24420a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarIter:
24430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Iterator Class.
24440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       for tarinfo in TarFile(...):
24460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           suite...
24470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
24480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, tarfile):
24500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Construct a TarIter object.
24510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
24520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tarfile = tarfile
24530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.index = 0
24540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __iter__(self):
24550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return iterator object.
24560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
24570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self
24580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def next(self):
24590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """Return the next item using TarFile's next() method.
24600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao           When all members have been read, set TarFile as _loaded.
24610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        """
24620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # Fix for SF #1100429: Under rare circumstances it can
24630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # happen that getmembers() is called during iteration,
24640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # which will cause TarIter to stop prematurely.
24650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if self.index == 0 and self.tarfile.firstmember is not None:
24670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = self.tarfile.next()
24680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif self.index < len(self.tarfile.members):
24690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = self.tarfile.members[self.index]
24700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif not self.tarfile._loaded:
24710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tarinfo = self.tarfile.next()
24720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if not tarinfo:
24730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                self.tarfile._loaded = True
24740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise StopIteration
24750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
24760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise StopIteration
24770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.index += 1
24780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return tarinfo
24790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Helper classes for sparse file support
24810a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _section:
24820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Base class for _data and _hole.
24830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
24840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, offset, size):
24850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.offset = offset
24860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.size = size
24870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __contains__(self, offset):
24880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.offset <= offset < self.offset + self.size
24890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24900a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _data(_section):
24910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Represent a data section in a sparse file.
24920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
24930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, offset, size, realpos):
24940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _section.__init__(self, offset, size)
24950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.realpos = realpos
24960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
24970a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _hole(_section):
24980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Represent a hole section in a sparse file.
24990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
25000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    pass
25010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
25020a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass _ringbuffer(list):
25030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Ringbuffer class which increases performance
25040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       over a regular list.
25050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
25060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self):
25070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.idx = 0
25080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def find(self, offset):
25090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        idx = self.idx
25100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        while True:
25110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            item = self[idx]
25120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if offset in item:
25130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
25140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            idx += 1
25150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if idx == len(self):
25160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                idx = 0
25170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if idx == self.idx:
25180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # End of File
25190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return None
25200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.idx = idx
25210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return item
25220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
25230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------
25240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# zipfile compatible TarFile class
25250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#---------------------------------------------
25260a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTAR_PLAIN = 0           # zipfile.ZIP_STORED
25270a8c90248264a8b26970b4473770bcc3df8515fJosh GaoTAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
25280a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoclass TarFileCompat:
25290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """TarFile class compatible with standard module zipfile's
25300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       ZipFile class.
25310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
25320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def __init__(self, file, mode="r", compression=TAR_PLAIN):
25330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        from warnings import warnpy3k
25340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        warnpy3k("the TarFileCompat class has been removed in Python 3.0",
25350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                stacklevel=2)
25360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if compression == TAR_PLAIN:
25370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.tarfile = TarFile.taropen(file, mode)
25380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif compression == TAR_GZIPPED:
25390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            self.tarfile = TarFile.gzopen(file, mode)
25400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
25410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError("unknown compression constant")
25420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if mode[0:1] == "r":
25430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            members = self.tarfile.getmembers()
25440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for m in members:
25450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                m.filename = m.name
25460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                m.file_size = m.size
25470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                m.date_time = time.gmtime(m.mtime)[:6]
25480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def namelist(self):
25490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return map(lambda m: m.name, self.infolist())
25500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def infolist(self):
25510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return filter(lambda m: m.type in REGULAR_TYPES,
25520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                      self.tarfile.getmembers())
25530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def printdir(self):
25540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tarfile.list()
25550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def testzip(self):
25560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return
25570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def getinfo(self, name):
25580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.tarfile.getmember(name)
25590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def read(self, name):
25600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
25610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def write(self, filename, arcname=None, compress_type=None):
25620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tarfile.add(filename, arcname)
25630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def writestr(self, zinfo, bytes):
25640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        try:
25650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from cStringIO import StringIO
25660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        except ImportError:
25670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            from StringIO import StringIO
25680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import calendar
25690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tinfo = TarInfo(zinfo.filename)
25700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tinfo.size = len(bytes)
25710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        tinfo.mtime = calendar.timegm(zinfo.date_time)
25720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tarfile.addfile(tinfo, StringIO(bytes))
25730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    def close(self):
25740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        self.tarfile.close()
25750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#class TarFileCompat
25760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
25770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------
25780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# exported functions
25790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#--------------------
25800a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef is_tarfile(name):
25810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """Return True if name points to a tar archive that we
25820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao       are able to handle, else return False.
25830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    """
25840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
25850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t = open(name)
25860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        t.close()
25870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return True
25880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except TarError:
25890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return False
25900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
25910a8c90248264a8b26970b4473770bcc3df8515fJosh Gaobltn_open = open
25920a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoopen = TarFile.open
2593