1#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gust�bel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission  is  hereby granted,  free  of charge,  to  any person
10# obtaining a  copy of  this software  and associated documentation
11# files  (the  "Software"),  to   deal  in  the  Software   without
12# restriction,  including  without limitation  the  rights to  use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies  of  the  Software,  and to  permit  persons  to  whom the
15# Software  is  furnished  to  do  so,  subject  to  the  following
16# conditions:
17#
18# The above copyright  notice and this  permission notice shall  be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
22# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
23# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
24# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
25# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
26# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision: 85213 $"
34# $Source$
35
36version     = "0.9.0"
37__author__  = "Lars Gust�bel (lars@gustaebel.de)"
38__date__    = "$Date$"
39__cvsid__   = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gust�bel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52import copy
53import re
54import operator
55
56try:
57    import grp, pwd
58except ImportError:
59    grp = pwd = None
60
61# from tarfile import *
62__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64#---------------------------------------------------------
65# tar constants
66#---------------------------------------------------------
67NUL = "\0"                      # the null character
68BLOCKSIZE = 512                 # length of processing blocks
69RECORDSIZE = BLOCKSIZE * 20     # length of records
70GNU_MAGIC = "ustar  \0"         # magic gnu tar string
71POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
72
73LENGTH_NAME = 100               # maximum length of a filename
74LENGTH_LINK = 100               # maximum length of a linkname
75LENGTH_PREFIX = 155             # maximum length of the prefix field
76
77REGTYPE = "0"                   # regular file
78AREGTYPE = "\0"                 # regular file
79LNKTYPE = "1"                   # link (inside tarfile)
80SYMTYPE = "2"                   # symbolic link
81CHRTYPE = "3"                   # character special device
82BLKTYPE = "4"                   # block special device
83DIRTYPE = "5"                   # directory
84FIFOTYPE = "6"                  # fifo special device
85CONTTYPE = "7"                  # contiguous file
86
87GNUTYPE_LONGNAME = "L"          # GNU tar longname
88GNUTYPE_LONGLINK = "K"          # GNU tar longlink
89GNUTYPE_SPARSE = "S"            # GNU tar sparse file
90
91XHDTYPE = "x"                   # POSIX.1-2001 extended header
92XGLTYPE = "g"                   # POSIX.1-2001 global header
93SOLARIS_XHDTYPE = "X"           # Solaris extended header
94
95USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
96GNU_FORMAT = 1                  # GNU tar format
97PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
98DEFAULT_FORMAT = GNU_FORMAT
99
100#---------------------------------------------------------
101# tarfile constants
102#---------------------------------------------------------
103# File types that tarfile supports:
104SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105                   SYMTYPE, DIRTYPE, FIFOTYPE,
106                   CONTTYPE, CHRTYPE, BLKTYPE,
107                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108                   GNUTYPE_SPARSE)
109
110# File types that will be treated as a regular file.
111REGULAR_TYPES = (REGTYPE, AREGTYPE,
112                 CONTTYPE, GNUTYPE_SPARSE)
113
114# File types that are part of the GNU tar format.
115GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116             GNUTYPE_SPARSE)
117
118# Fields from a pax header that override a TarInfo attribute.
119PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120              "uid", "gid", "uname", "gname")
121
122# Fields in a pax header that are numbers, all other fields
123# are treated as strings.
124PAX_NUMBER_FIELDS = {
125    "atime": float,
126    "ctime": float,
127    "mtime": float,
128    "uid": int,
129    "gid": int,
130    "size": int
131}
132
133#---------------------------------------------------------
134# Bits used in the mode field, values in octal.
135#---------------------------------------------------------
136S_IFLNK = 0120000        # symbolic link
137S_IFREG = 0100000        # regular file
138S_IFBLK = 0060000        # block device
139S_IFDIR = 0040000        # directory
140S_IFCHR = 0020000        # character device
141S_IFIFO = 0010000        # fifo
142
143TSUID   = 04000          # set UID on execution
144TSGID   = 02000          # set GID on execution
145TSVTX   = 01000          # reserved
146
147TUREAD  = 0400           # read by owner
148TUWRITE = 0200           # write by owner
149TUEXEC  = 0100           # execute/search by owner
150TGREAD  = 0040           # read by group
151TGWRITE = 0020           # write by group
152TGEXEC  = 0010           # execute/search by group
153TOREAD  = 0004           # read by other
154TOWRITE = 0002           # write by other
155TOEXEC  = 0001           # execute/search by other
156
157#---------------------------------------------------------
158# initialization
159#---------------------------------------------------------
160ENCODING = sys.getfilesystemencoding()
161if ENCODING is None:
162    ENCODING = sys.getdefaultencoding()
163
164#---------------------------------------------------------
165# Some useful functions
166#---------------------------------------------------------
167
168def stn(s, length):
169    """Convert a python string to a null-terminated string buffer.
170    """
171    return s[:length] + (length - len(s)) * NUL
172
173def nts(s):
174    """Convert a null-terminated string field to a python string.
175    """
176    # Use the string up to the first null char.
177    p = s.find("\0")
178    if p == -1:
179        return s
180    return s[:p]
181
182def nti(s):
183    """Convert a number field to a python number.
184    """
185    # There are two possible encodings for a number field, see
186    # itn() below.
187    if s[0] != chr(0200):
188        try:
189            n = int(nts(s) or "0", 8)
190        except ValueError:
191            raise InvalidHeaderError("invalid header")
192    else:
193        n = 0L
194        for i in xrange(len(s) - 1):
195            n <<= 8
196            n += ord(s[i + 1])
197    return n
198
199def itn(n, digits=8, format=DEFAULT_FORMAT):
200    """Convert a python number to a number field.
201    """
202    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
203    # octal digits followed by a null-byte, this allows values up to
204    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
205    # that if necessary. A leading 0200 byte indicates this particular
206    # encoding, the following digits-1 bytes are a big-endian
207    # representation. This allows values up to (256**(digits-1))-1.
208    if 0 <= n < 8 ** (digits - 1):
209        s = "%0*o" % (digits - 1, n) + NUL
210    else:
211        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
212            raise ValueError("overflow in number field")
213
214        if n < 0:
215            # XXX We mimic GNU tar's behaviour with negative numbers,
216            # this could raise OverflowError.
217            n = struct.unpack("L", struct.pack("l", n))[0]
218
219        s = ""
220        for i in xrange(digits - 1):
221            s = chr(n & 0377) + s
222            n >>= 8
223        s = chr(0200) + s
224    return s
225
226def uts(s, encoding, errors):
227    """Convert a unicode object to a string.
228    """
229    if errors == "utf-8":
230        # An extra error handler similar to the -o invalid=UTF-8 option
231        # in POSIX.1-2001. Replace untranslatable characters with their
232        # UTF-8 representation.
233        try:
234            return s.encode(encoding, "strict")
235        except UnicodeEncodeError:
236            x = []
237            for c in s:
238                try:
239                    x.append(c.encode(encoding, "strict"))
240                except UnicodeEncodeError:
241                    x.append(c.encode("utf8"))
242            return "".join(x)
243    else:
244        return s.encode(encoding, errors)
245
246def calc_chksums(buf):
247    """Calculate the checksum for a member's header by summing up all
248       characters except for the chksum field which is treated as if
249       it was filled with spaces. According to the GNU tar sources,
250       some tars (Sun and NeXT) calculate chksum with signed char,
251       which will be different if there are chars in the buffer with
252       the high bit set. So we calculate two checksums, unsigned and
253       signed.
254    """
255    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257    return unsigned_chksum, signed_chksum
258
259def copyfileobj(src, dst, length=None):
260    """Copy length bytes from fileobj src to fileobj dst.
261       If length is None, copy the entire content.
262    """
263    if length == 0:
264        return
265    if length is None:
266        shutil.copyfileobj(src, dst)
267        return
268
269    BUFSIZE = 16 * 1024
270    blocks, remainder = divmod(length, BUFSIZE)
271    for b in xrange(blocks):
272        buf = src.read(BUFSIZE)
273        if len(buf) < BUFSIZE:
274            raise IOError("end of file reached")
275        dst.write(buf)
276
277    if remainder != 0:
278        buf = src.read(remainder)
279        if len(buf) < remainder:
280            raise IOError("end of file reached")
281        dst.write(buf)
282    return
283
284filemode_table = (
285    ((S_IFLNK,      "l"),
286     (S_IFREG,      "-"),
287     (S_IFBLK,      "b"),
288     (S_IFDIR,      "d"),
289     (S_IFCHR,      "c"),
290     (S_IFIFO,      "p")),
291
292    ((TUREAD,       "r"),),
293    ((TUWRITE,      "w"),),
294    ((TUEXEC|TSUID, "s"),
295     (TSUID,        "S"),
296     (TUEXEC,       "x")),
297
298    ((TGREAD,       "r"),),
299    ((TGWRITE,      "w"),),
300    ((TGEXEC|TSGID, "s"),
301     (TSGID,        "S"),
302     (TGEXEC,       "x")),
303
304    ((TOREAD,       "r"),),
305    ((TOWRITE,      "w"),),
306    ((TOEXEC|TSVTX, "t"),
307     (TSVTX,        "T"),
308     (TOEXEC,       "x"))
309)
310
311def filemode(mode):
312    """Convert a file's mode to a string of the form
313       -rwxrwxrwx.
314       Used by TarFile.list()
315    """
316    perm = []
317    for table in filemode_table:
318        for bit, char in table:
319            if mode & bit == bit:
320                perm.append(char)
321                break
322        else:
323            perm.append("-")
324    return "".join(perm)
325
326class TarError(Exception):
327    """Base exception."""
328    pass
329class ExtractError(TarError):
330    """General exception for extract errors."""
331    pass
332class ReadError(TarError):
333    """Exception for unreadble tar archives."""
334    pass
335class CompressionError(TarError):
336    """Exception for unavailable compression methods."""
337    pass
338class StreamError(TarError):
339    """Exception for unsupported operations on stream-like TarFiles."""
340    pass
341class HeaderError(TarError):
342    """Base exception for header errors."""
343    pass
344class EmptyHeaderError(HeaderError):
345    """Exception for empty headers."""
346    pass
347class TruncatedHeaderError(HeaderError):
348    """Exception for truncated headers."""
349    pass
350class EOFHeaderError(HeaderError):
351    """Exception for end of file headers."""
352    pass
353class InvalidHeaderError(HeaderError):
354    """Exception for invalid headers."""
355    pass
356class SubsequentHeaderError(HeaderError):
357    """Exception for missing and invalid extended headers."""
358    pass
359
360#---------------------------
361# internal stream interface
362#---------------------------
363class _LowLevelFile:
364    """Low-level file object. Supports reading and writing.
365       It is used instead of a regular file object for streaming
366       access.
367    """
368
369    def __init__(self, name, mode):
370        mode = {
371            "r": os.O_RDONLY,
372            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
373        }[mode]
374        if hasattr(os, "O_BINARY"):
375            mode |= os.O_BINARY
376        self.fd = os.open(name, mode, 0666)
377
378    def close(self):
379        os.close(self.fd)
380
381    def read(self, size):
382        return os.read(self.fd, size)
383
384    def write(self, s):
385        os.write(self.fd, s)
386
387class _Stream:
388    """Class that serves as an adapter between TarFile and
389       a stream-like object.  The stream-like object only
390       needs to have a read() or write() method and is accessed
391       blockwise.  Use of gzip or bzip2 compression is possible.
392       A stream-like object could be for example: sys.stdin,
393       sys.stdout, a socket, a tape device etc.
394
395       _Stream is intended to be used only internally.
396    """
397
398    def __init__(self, name, mode, comptype, fileobj, bufsize):
399        """Construct a _Stream object.
400        """
401        self._extfileobj = True
402        if fileobj is None:
403            fileobj = _LowLevelFile(name, mode)
404            self._extfileobj = False
405
406        if comptype == '*':
407            # Enable transparent compression detection for the
408            # stream interface
409            fileobj = _StreamProxy(fileobj)
410            comptype = fileobj.getcomptype()
411
412        self.name     = name or ""
413        self.mode     = mode
414        self.comptype = comptype
415        self.fileobj  = fileobj
416        self.bufsize  = bufsize
417        self.buf      = ""
418        self.pos      = 0L
419        self.closed   = False
420
421        if comptype == "gz":
422            try:
423                import zlib
424            except ImportError:
425                raise CompressionError("zlib module is not available")
426            self.zlib = zlib
427            self.crc = zlib.crc32("") & 0xffffffffL
428            if mode == "r":
429                self._init_read_gz()
430            else:
431                self._init_write_gz()
432
433        if comptype == "bz2":
434            try:
435                import bz2
436            except ImportError:
437                raise CompressionError("bz2 module is not available")
438            if mode == "r":
439                self.dbuf = ""
440                self.cmp = bz2.BZ2Decompressor()
441            else:
442                self.cmp = bz2.BZ2Compressor()
443
444    def __del__(self):
445        if hasattr(self, "closed") and not self.closed:
446            self.close()
447
448    def _init_write_gz(self):
449        """Initialize for writing with gzip compression.
450        """
451        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
452                                            -self.zlib.MAX_WBITS,
453                                            self.zlib.DEF_MEM_LEVEL,
454                                            0)
455        timestamp = struct.pack("<L", long(time.time()))
456        self.__write("\037\213\010\010%s\002\377" % timestamp)
457        if type(self.name) is unicode:
458            self.name = self.name.encode("iso-8859-1", "replace")
459        if self.name.endswith(".gz"):
460            self.name = self.name[:-3]
461        self.__write(self.name + NUL)
462
463    def write(self, s):
464        """Write string s to the stream.
465        """
466        if self.comptype == "gz":
467            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
468        self.pos += len(s)
469        if self.comptype != "tar":
470            s = self.cmp.compress(s)
471        self.__write(s)
472
473    def __write(self, s):
474        """Write string s to the stream if a whole new block
475           is ready to be written.
476        """
477        self.buf += s
478        while len(self.buf) > self.bufsize:
479            self.fileobj.write(self.buf[:self.bufsize])
480            self.buf = self.buf[self.bufsize:]
481
482    def close(self):
483        """Close the _Stream object. No operation should be
484           done on it afterwards.
485        """
486        if self.closed:
487            return
488
489        if self.mode == "w" and self.comptype != "tar":
490            self.buf += self.cmp.flush()
491
492        if self.mode == "w" and self.buf:
493            self.fileobj.write(self.buf)
494            self.buf = ""
495            if self.comptype == "gz":
496                # The native zlib crc is an unsigned 32-bit integer, but
497                # the Python wrapper implicitly casts that to a signed C
498                # long.  So, on a 32-bit box self.crc may "look negative",
499                # while the same crc on a 64-bit box may "look positive".
500                # To avoid irksome warnings from the `struct` module, force
501                # it to look positive on all boxes.
502                self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
503                self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
504
505        if not self._extfileobj:
506            self.fileobj.close()
507
508        self.closed = True
509
510    def _init_read_gz(self):
511        """Initialize for reading a gzip compressed fileobj.
512        """
513        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
514        self.dbuf = ""
515
516        # taken from gzip.GzipFile with some alterations
517        if self.__read(2) != "\037\213":
518            raise ReadError("not a gzip file")
519        if self.__read(1) != "\010":
520            raise CompressionError("unsupported compression method")
521
522        flag = ord(self.__read(1))
523        self.__read(6)
524
525        if flag & 4:
526            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
527            self.read(xlen)
528        if flag & 8:
529            while True:
530                s = self.__read(1)
531                if not s or s == NUL:
532                    break
533        if flag & 16:
534            while True:
535                s = self.__read(1)
536                if not s or s == NUL:
537                    break
538        if flag & 2:
539            self.__read(2)
540
541    def tell(self):
542        """Return the stream's file pointer position.
543        """
544        return self.pos
545
546    def seek(self, pos=0):
547        """Set the stream's file pointer to pos. Negative seeking
548           is forbidden.
549        """
550        if pos - self.pos >= 0:
551            blocks, remainder = divmod(pos - self.pos, self.bufsize)
552            for i in xrange(blocks):
553                self.read(self.bufsize)
554            self.read(remainder)
555        else:
556            raise StreamError("seeking backwards is not allowed")
557        return self.pos
558
559    def read(self, size=None):
560        """Return the next size number of bytes from the stream.
561           If size is not defined, return all bytes of the stream
562           up to EOF.
563        """
564        if size is None:
565            t = []
566            while True:
567                buf = self._read(self.bufsize)
568                if not buf:
569                    break
570                t.append(buf)
571            buf = "".join(t)
572        else:
573            buf = self._read(size)
574        self.pos += len(buf)
575        return buf
576
577    def _read(self, size):
578        """Return size bytes from the stream.
579        """
580        if self.comptype == "tar":
581            return self.__read(size)
582
583        c = len(self.dbuf)
584        t = [self.dbuf]
585        while c < size:
586            buf = self.__read(self.bufsize)
587            if not buf:
588                break
589            try:
590                buf = self.cmp.decompress(buf)
591            except IOError:
592                raise ReadError("invalid compressed data")
593            t.append(buf)
594            c += len(buf)
595        t = "".join(t)
596        self.dbuf = t[size:]
597        return t[:size]
598
599    def __read(self, size):
600        """Return size bytes from stream. If internal buffer is empty,
601           read another block from the stream.
602        """
603        c = len(self.buf)
604        t = [self.buf]
605        while c < size:
606            buf = self.fileobj.read(self.bufsize)
607            if not buf:
608                break
609            t.append(buf)
610            c += len(buf)
611        t = "".join(t)
612        self.buf = t[size:]
613        return t[:size]
614# class _Stream
615
616class _StreamProxy(object):
617    """Small proxy class that enables transparent compression
618       detection for the Stream interface (mode 'r|*').
619    """
620
621    def __init__(self, fileobj):
622        self.fileobj = fileobj
623        self.buf = self.fileobj.read(BLOCKSIZE)
624
625    def read(self, size):
626        self.read = self.fileobj.read
627        return self.buf
628
629    def getcomptype(self):
630        if self.buf.startswith("\037\213\010"):
631            return "gz"
632        if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
633            return "bz2"
634        return "tar"
635
636    def close(self):
637        self.fileobj.close()
638# class StreamProxy
639
640class _BZ2Proxy(object):
641    """Small proxy class that enables external file object
642       support for "r:bz2" and "w:bz2" modes. This is actually
643       a workaround for a limitation in bz2 module's BZ2File
644       class which (unlike gzip.GzipFile) has no support for
645       a file object argument.
646    """
647
648    blocksize = 16 * 1024
649
650    def __init__(self, fileobj, mode):
651        self.fileobj = fileobj
652        self.mode = mode
653        self.name = getattr(self.fileobj, "name", None)
654        self.init()
655
656    def init(self):
657        import bz2
658        self.pos = 0
659        if self.mode == "r":
660            self.bz2obj = bz2.BZ2Decompressor()
661            self.fileobj.seek(0)
662            self.buf = ""
663        else:
664            self.bz2obj = bz2.BZ2Compressor()
665
666    def read(self, size):
667        b = [self.buf]
668        x = len(self.buf)
669        while x < size:
670            raw = self.fileobj.read(self.blocksize)
671            if not raw:
672                break
673            data = self.bz2obj.decompress(raw)
674            b.append(data)
675            x += len(data)
676        self.buf = "".join(b)
677
678        buf = self.buf[:size]
679        self.buf = self.buf[size:]
680        self.pos += len(buf)
681        return buf
682
683    def seek(self, pos):
684        if pos < self.pos:
685            self.init()
686        self.read(pos - self.pos)
687
688    def tell(self):
689        return self.pos
690
691    def write(self, data):
692        self.pos += len(data)
693        raw = self.bz2obj.compress(data)
694        self.fileobj.write(raw)
695
696    def close(self):
697        if self.mode == "w":
698            raw = self.bz2obj.flush()
699            self.fileobj.write(raw)
700# class _BZ2Proxy
701
702#------------------------
703# Extraction file object
704#------------------------
705class _FileInFile(object):
706    """A thin wrapper around an existing file object that
707       provides a part of its data as an individual file
708       object.
709    """
710
711    def __init__(self, fileobj, offset, size, sparse=None):
712        self.fileobj = fileobj
713        self.offset = offset
714        self.size = size
715        self.sparse = sparse
716        self.position = 0
717
718    def tell(self):
719        """Return the current file position.
720        """
721        return self.position
722
723    def seek(self, position):
724        """Seek to a position in the file.
725        """
726        self.position = position
727
728    def read(self, size=None):
729        """Read data from the file.
730        """
731        if size is None:
732            size = self.size - self.position
733        else:
734            size = min(size, self.size - self.position)
735
736        if self.sparse is None:
737            return self.readnormal(size)
738        else:
739            return self.readsparse(size)
740
741    def readnormal(self, size):
742        """Read operation for regular files.
743        """
744        self.fileobj.seek(self.offset + self.position)
745        self.position += size
746        return self.fileobj.read(size)
747
748    def readsparse(self, size):
749        """Read operation for sparse files.
750        """
751        data = []
752        while size > 0:
753            buf = self.readsparsesection(size)
754            if not buf:
755                break
756            size -= len(buf)
757            data.append(buf)
758        return "".join(data)
759
760    def readsparsesection(self, size):
761        """Read a single section of a sparse file.
762        """
763        section = self.sparse.find(self.position)
764
765        if section is None:
766            return ""
767
768        size = min(size, section.offset + section.size - self.position)
769
770        if isinstance(section, _data):
771            realpos = section.realpos + self.position - section.offset
772            self.fileobj.seek(self.offset + realpos)
773            self.position += size
774            return self.fileobj.read(size)
775        else:
776            self.position += size
777            return NUL * size
778#class _FileInFile
779
780
781class ExFileObject(object):
782    """File-like object for reading an archive member.
783       Is returned by TarFile.extractfile().
784    """
785    blocksize = 1024
786
787    def __init__(self, tarfile, tarinfo):
788        self.fileobj = _FileInFile(tarfile.fileobj,
789                                   tarinfo.offset_data,
790                                   tarinfo.size,
791                                   getattr(tarinfo, "sparse", None))
792        self.name = tarinfo.name
793        self.mode = "r"
794        self.closed = False
795        self.size = tarinfo.size
796
797        self.position = 0
798        self.buffer = ""
799
800    def read(self, size=None):
801        """Read at most size bytes from the file. If size is not
802           present or None, read all data until EOF is reached.
803        """
804        if self.closed:
805            raise ValueError("I/O operation on closed file")
806
807        buf = ""
808        if self.buffer:
809            if size is None:
810                buf = self.buffer
811                self.buffer = ""
812            else:
813                buf = self.buffer[:size]
814                self.buffer = self.buffer[size:]
815
816        if size is None:
817            buf += self.fileobj.read()
818        else:
819            buf += self.fileobj.read(size - len(buf))
820
821        self.position += len(buf)
822        return buf
823
824    def readline(self, size=-1):
825        """Read one entire line from the file. If size is present
826           and non-negative, return a string with at most that
827           size, which may be an incomplete line.
828        """
829        if self.closed:
830            raise ValueError("I/O operation on closed file")
831
832        if "\n" in self.buffer:
833            pos = self.buffer.find("\n") + 1
834        else:
835            buffers = [self.buffer]
836            while True:
837                buf = self.fileobj.read(self.blocksize)
838                buffers.append(buf)
839                if not buf or "\n" in buf:
840                    self.buffer = "".join(buffers)
841                    pos = self.buffer.find("\n") + 1
842                    if pos == 0:
843                        # no newline found.
844                        pos = len(self.buffer)
845                    break
846
847        if size != -1:
848            pos = min(size, pos)
849
850        buf = self.buffer[:pos]
851        self.buffer = self.buffer[pos:]
852        self.position += len(buf)
853        return buf
854
855    def readlines(self):
856        """Return a list with all remaining lines.
857        """
858        result = []
859        while True:
860            line = self.readline()
861            if not line: break
862            result.append(line)
863        return result
864
865    def tell(self):
866        """Return the current file position.
867        """
868        if self.closed:
869            raise ValueError("I/O operation on closed file")
870
871        return self.position
872
873    def seek(self, pos, whence=os.SEEK_SET):
874        """Seek to a position in the file.
875        """
876        if self.closed:
877            raise ValueError("I/O operation on closed file")
878
879        if whence == os.SEEK_SET:
880            self.position = min(max(pos, 0), self.size)
881        elif whence == os.SEEK_CUR:
882            if pos < 0:
883                self.position = max(self.position + pos, 0)
884            else:
885                self.position = min(self.position + pos, self.size)
886        elif whence == os.SEEK_END:
887            self.position = max(min(self.size + pos, self.size), 0)
888        else:
889            raise ValueError("Invalid argument")
890
891        self.buffer = ""
892        self.fileobj.seek(self.position)
893
894    def close(self):
895        """Close the file object.
896        """
897        self.closed = True
898
899    def __iter__(self):
900        """Get an iterator over the file's lines.
901        """
902        while True:
903            line = self.readline()
904            if not line:
905                break
906            yield line
907#class ExFileObject
908
909#------------------
910# Exported Classes
911#------------------
912class TarInfo(object):
913    """Informational class which holds the details about an
914       archive member given by a tar header block.
915       TarInfo objects are returned by TarFile.getmember(),
916       TarFile.getmembers() and TarFile.gettarinfo() and are
917       usually created internally.
918    """
919
920    def __init__(self, name=""):
921        """Construct a TarInfo object. name is the optional name
922           of the member.
923        """
924        self.name = name        # member name
925        self.mode = 0644        # file permissions
926        self.uid = 0            # user id
927        self.gid = 0            # group id
928        self.size = 0           # file size
929        self.mtime = 0          # modification time
930        self.chksum = 0         # header checksum
931        self.type = REGTYPE     # member type
932        self.linkname = ""      # link name
933        self.uname = ""         # user name
934        self.gname = ""         # group name
935        self.devmajor = 0       # device major number
936        self.devminor = 0       # device minor number
937
938        self.offset = 0         # the tar header starts here
939        self.offset_data = 0    # the file's data starts here
940
941        self.pax_headers = {}   # pax header information
942
943    # In pax headers the "name" and "linkname" field are called
944    # "path" and "linkpath".
945    def _getpath(self):
946        return self.name
947    def _setpath(self, name):
948        self.name = name
949    path = property(_getpath, _setpath)
950
951    def _getlinkpath(self):
952        return self.linkname
953    def _setlinkpath(self, linkname):
954        self.linkname = linkname
955    linkpath = property(_getlinkpath, _setlinkpath)
956
957    def __repr__(self):
958        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
959
960    def get_info(self, encoding, errors):
961        """Return the TarInfo's attributes as a dictionary.
962        """
963        info = {
964            "name":     self.name,
965            "mode":     self.mode & 07777,
966            "uid":      self.uid,
967            "gid":      self.gid,
968            "size":     self.size,
969            "mtime":    self.mtime,
970            "chksum":   self.chksum,
971            "type":     self.type,
972            "linkname": self.linkname,
973            "uname":    self.uname,
974            "gname":    self.gname,
975            "devmajor": self.devmajor,
976            "devminor": self.devminor
977        }
978
979        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
980            info["name"] += "/"
981
982        for key in ("name", "linkname", "uname", "gname"):
983            if type(info[key]) is unicode:
984                info[key] = info[key].encode(encoding, errors)
985
986        return info
987
988    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
989        """Return a tar header as a string of 512 byte blocks.
990        """
991        info = self.get_info(encoding, errors)
992
993        if format == USTAR_FORMAT:
994            return self.create_ustar_header(info)
995        elif format == GNU_FORMAT:
996            return self.create_gnu_header(info)
997        elif format == PAX_FORMAT:
998            return self.create_pax_header(info, encoding, errors)
999        else:
1000            raise ValueError("invalid format")
1001
1002    def create_ustar_header(self, info):
1003        """Return the object as a ustar header block.
1004        """
1005        info["magic"] = POSIX_MAGIC
1006
1007        if len(info["linkname"]) > LENGTH_LINK:
1008            raise ValueError("linkname is too long")
1009
1010        if len(info["name"]) > LENGTH_NAME:
1011            info["prefix"], info["name"] = self._posix_split_name(info["name"])
1012
1013        return self._create_header(info, USTAR_FORMAT)
1014
1015    def create_gnu_header(self, info):
1016        """Return the object as a GNU header block sequence.
1017        """
1018        info["magic"] = GNU_MAGIC
1019
1020        buf = ""
1021        if len(info["linkname"]) > LENGTH_LINK:
1022            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1023
1024        if len(info["name"]) > LENGTH_NAME:
1025            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1026
1027        return buf + self._create_header(info, GNU_FORMAT)
1028
1029    def create_pax_header(self, info, encoding, errors):
1030        """Return the object as a ustar header block. If it cannot be
1031           represented this way, prepend a pax extended header sequence
1032           with supplement information.
1033        """
1034        info["magic"] = POSIX_MAGIC
1035        pax_headers = self.pax_headers.copy()
1036
1037        # Test string fields for values that exceed the field length or cannot
1038        # be represented in ASCII encoding.
1039        for name, hname, length in (
1040                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1041                ("uname", "uname", 32), ("gname", "gname", 32)):
1042
1043            if hname in pax_headers:
1044                # The pax header has priority.
1045                continue
1046
1047            val = info[name].decode(encoding, errors)
1048
1049            # Try to encode the string as ASCII.
1050            try:
1051                val.encode("ascii")
1052            except UnicodeEncodeError:
1053                pax_headers[hname] = val
1054                continue
1055
1056            if len(info[name]) > length:
1057                pax_headers[hname] = val
1058
1059        # Test number fields for values that exceed the field limit or values
1060        # that like to be stored as float.
1061        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1062            if name in pax_headers:
1063                # The pax header has priority. Avoid overflow.
1064                info[name] = 0
1065                continue
1066
1067            val = info[name]
1068            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1069                pax_headers[name] = unicode(val)
1070                info[name] = 0
1071
1072        # Create a pax extended header if necessary.
1073        if pax_headers:
1074            buf = self._create_pax_generic_header(pax_headers)
1075        else:
1076            buf = ""
1077
1078        return buf + self._create_header(info, USTAR_FORMAT)
1079
1080    @classmethod
1081    def create_pax_global_header(cls, pax_headers):
1082        """Return the object as a pax global header block sequence.
1083        """
1084        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1085
1086    def _posix_split_name(self, name):
1087        """Split a name longer than 100 chars into a prefix
1088           and a name part.
1089        """
1090        prefix = name[:LENGTH_PREFIX + 1]
1091        while prefix and prefix[-1] != "/":
1092            prefix = prefix[:-1]
1093
1094        name = name[len(prefix):]
1095        prefix = prefix[:-1]
1096
1097        if not prefix or len(name) > LENGTH_NAME:
1098            raise ValueError("name is too long")
1099        return prefix, name
1100
1101    @staticmethod
1102    def _create_header(info, format):
1103        """Return a header block. info is a dictionary with file
1104           information, format must be one of the *_FORMAT constants.
1105        """
1106        parts = [
1107            stn(info.get("name", ""), 100),
1108            itn(info.get("mode", 0) & 07777, 8, format),
1109            itn(info.get("uid", 0), 8, format),
1110            itn(info.get("gid", 0), 8, format),
1111            itn(info.get("size", 0), 12, format),
1112            itn(info.get("mtime", 0), 12, format),
1113            "        ", # checksum field
1114            info.get("type", REGTYPE),
1115            stn(info.get("linkname", ""), 100),
1116            stn(info.get("magic", POSIX_MAGIC), 8),
1117            stn(info.get("uname", ""), 32),
1118            stn(info.get("gname", ""), 32),
1119            itn(info.get("devmajor", 0), 8, format),
1120            itn(info.get("devminor", 0), 8, format),
1121            stn(info.get("prefix", ""), 155)
1122        ]
1123
1124        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1125        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1126        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1127        return buf
1128
1129    @staticmethod
1130    def _create_payload(payload):
1131        """Return the string payload filled with zero bytes
1132           up to the next 512 byte border.
1133        """
1134        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1135        if remainder > 0:
1136            payload += (BLOCKSIZE - remainder) * NUL
1137        return payload
1138
1139    @classmethod
1140    def _create_gnu_long_header(cls, name, type):
1141        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1142           for name.
1143        """
1144        name += NUL
1145
1146        info = {}
1147        info["name"] = "././@LongLink"
1148        info["type"] = type
1149        info["size"] = len(name)
1150        info["magic"] = GNU_MAGIC
1151
1152        # create extended header + name blocks.
1153        return cls._create_header(info, USTAR_FORMAT) + \
1154                cls._create_payload(name)
1155
1156    @classmethod
1157    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1158        """Return a POSIX.1-2001 extended or global header sequence
1159           that contains a list of keyword, value pairs. The values
1160           must be unicode objects.
1161        """
1162        records = []
1163        for keyword, value in pax_headers.iteritems():
1164            keyword = keyword.encode("utf8")
1165            value = value.encode("utf8")
1166            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1167            n = p = 0
1168            while True:
1169                n = l + len(str(p))
1170                if n == p:
1171                    break
1172                p = n
1173            records.append("%d %s=%s\n" % (p, keyword, value))
1174        records = "".join(records)
1175
1176        # We use a hardcoded "././@PaxHeader" name like star does
1177        # instead of the one that POSIX recommends.
1178        info = {}
1179        info["name"] = "././@PaxHeader"
1180        info["type"] = type
1181        info["size"] = len(records)
1182        info["magic"] = POSIX_MAGIC
1183
1184        # Create pax header + record blocks.
1185        return cls._create_header(info, USTAR_FORMAT) + \
1186                cls._create_payload(records)
1187
1188    @classmethod
1189    def frombuf(cls, buf):
1190        """Construct a TarInfo object from a 512 byte string buffer.
1191        """
1192        if len(buf) == 0:
1193            raise EmptyHeaderError("empty header")
1194        if len(buf) != BLOCKSIZE:
1195            raise TruncatedHeaderError("truncated header")
1196        if buf.count(NUL) == BLOCKSIZE:
1197            raise EOFHeaderError("end of file header")
1198
1199        chksum = nti(buf[148:156])
1200        if chksum not in calc_chksums(buf):
1201            raise InvalidHeaderError("bad checksum")
1202
1203        obj = cls()
1204        obj.buf = buf
1205        obj.name = nts(buf[0:100])
1206        obj.mode = nti(buf[100:108])
1207        obj.uid = nti(buf[108:116])
1208        obj.gid = nti(buf[116:124])
1209        obj.size = nti(buf[124:136])
1210        obj.mtime = nti(buf[136:148])
1211        obj.chksum = chksum
1212        obj.type = buf[156:157]
1213        obj.linkname = nts(buf[157:257])
1214        obj.uname = nts(buf[265:297])
1215        obj.gname = nts(buf[297:329])
1216        obj.devmajor = nti(buf[329:337])
1217        obj.devminor = nti(buf[337:345])
1218        prefix = nts(buf[345:500])
1219
1220        # Old V7 tar format represents a directory as a regular
1221        # file with a trailing slash.
1222        if obj.type == AREGTYPE and obj.name.endswith("/"):
1223            obj.type = DIRTYPE
1224
1225        # Remove redundant slashes from directories.
1226        if obj.isdir():
1227            obj.name = obj.name.rstrip("/")
1228
1229        # Reconstruct a ustar longname.
1230        if prefix and obj.type not in GNU_TYPES:
1231            obj.name = prefix + "/" + obj.name
1232        return obj
1233
1234    @classmethod
1235    def fromtarfile(cls, tarfile):
1236        """Return the next TarInfo object from TarFile object
1237           tarfile.
1238        """
1239        buf = tarfile.fileobj.read(BLOCKSIZE)
1240        obj = cls.frombuf(buf)
1241        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1242        return obj._proc_member(tarfile)
1243
1244    #--------------------------------------------------------------------------
1245    # The following are methods that are called depending on the type of a
1246    # member. The entry point is _proc_member() which can be overridden in a
1247    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1248    # implement the following
1249    # operations:
1250    # 1. Set self.offset_data to the position where the data blocks begin,
1251    #    if there is data that follows.
1252    # 2. Set tarfile.offset to the position where the next member's header will
1253    #    begin.
1254    # 3. Return self or another valid TarInfo object.
1255    def _proc_member(self, tarfile):
1256        """Choose the right processing method depending on
1257           the type and call it.
1258        """
1259        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1260            return self._proc_gnulong(tarfile)
1261        elif self.type == GNUTYPE_SPARSE:
1262            return self._proc_sparse(tarfile)
1263        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1264            return self._proc_pax(tarfile)
1265        else:
1266            return self._proc_builtin(tarfile)
1267
1268    def _proc_builtin(self, tarfile):
1269        """Process a builtin type or an unknown type which
1270           will be treated as a regular file.
1271        """
1272        self.offset_data = tarfile.fileobj.tell()
1273        offset = self.offset_data
1274        if self.isreg() or self.type not in SUPPORTED_TYPES:
1275            # Skip the following data blocks.
1276            offset += self._block(self.size)
1277        tarfile.offset = offset
1278
1279        # Patch the TarInfo object with saved global
1280        # header information.
1281        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1282
1283        return self
1284
1285    def _proc_gnulong(self, tarfile):
1286        """Process the blocks that hold a GNU longname
1287           or longlink member.
1288        """
1289        buf = tarfile.fileobj.read(self._block(self.size))
1290
1291        # Fetch the next header and process it.
1292        try:
1293            next = self.fromtarfile(tarfile)
1294        except HeaderError:
1295            raise SubsequentHeaderError("missing or bad subsequent header")
1296
1297        # Patch the TarInfo object from the next header with
1298        # the longname information.
1299        next.offset = self.offset
1300        if self.type == GNUTYPE_LONGNAME:
1301            next.name = nts(buf)
1302        elif self.type == GNUTYPE_LONGLINK:
1303            next.linkname = nts(buf)
1304
1305        return next
1306
1307    def _proc_sparse(self, tarfile):
1308        """Process a GNU sparse header plus extra headers.
1309        """
1310        buf = self.buf
1311        sp = _ringbuffer()
1312        pos = 386
1313        lastpos = 0L
1314        realpos = 0L
1315        # There are 4 possible sparse structs in the
1316        # first header.
1317        for i in xrange(4):
1318            try:
1319                offset = nti(buf[pos:pos + 12])
1320                numbytes = nti(buf[pos + 12:pos + 24])
1321            except ValueError:
1322                break
1323            if offset > lastpos:
1324                sp.append(_hole(lastpos, offset - lastpos))
1325            sp.append(_data(offset, numbytes, realpos))
1326            realpos += numbytes
1327            lastpos = offset + numbytes
1328            pos += 24
1329
1330        isextended = ord(buf[482])
1331        origsize = nti(buf[483:495])
1332
1333        # If the isextended flag is given,
1334        # there are extra headers to process.
1335        while isextended == 1:
1336            buf = tarfile.fileobj.read(BLOCKSIZE)
1337            pos = 0
1338            for i in xrange(21):
1339                try:
1340                    offset = nti(buf[pos:pos + 12])
1341                    numbytes = nti(buf[pos + 12:pos + 24])
1342                except ValueError:
1343                    break
1344                if offset > lastpos:
1345                    sp.append(_hole(lastpos, offset - lastpos))
1346                sp.append(_data(offset, numbytes, realpos))
1347                realpos += numbytes
1348                lastpos = offset + numbytes
1349                pos += 24
1350            isextended = ord(buf[504])
1351
1352        if lastpos < origsize:
1353            sp.append(_hole(lastpos, origsize - lastpos))
1354
1355        self.sparse = sp
1356
1357        self.offset_data = tarfile.fileobj.tell()
1358        tarfile.offset = self.offset_data + self._block(self.size)
1359        self.size = origsize
1360
1361        return self
1362
1363    def _proc_pax(self, tarfile):
1364        """Process an extended or global header as described in
1365           POSIX.1-2001.
1366        """
1367        # Read the header information.
1368        buf = tarfile.fileobj.read(self._block(self.size))
1369
1370        # A pax header stores supplemental information for either
1371        # the following file (extended) or all following files
1372        # (global).
1373        if self.type == XGLTYPE:
1374            pax_headers = tarfile.pax_headers
1375        else:
1376            pax_headers = tarfile.pax_headers.copy()
1377
1378        # Parse pax header information. A record looks like that:
1379        # "%d %s=%s\n" % (length, keyword, value). length is the size
1380        # of the complete record including the length field itself and
1381        # the newline. keyword and value are both UTF-8 encoded strings.
1382        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1383        pos = 0
1384        while True:
1385            match = regex.match(buf, pos)
1386            if not match:
1387                break
1388
1389            length, keyword = match.groups()
1390            length = int(length)
1391            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1392
1393            keyword = keyword.decode("utf8")
1394            value = value.decode("utf8")
1395
1396            pax_headers[keyword] = value
1397            pos += length
1398
1399        # Fetch the next header.
1400        try:
1401            next = self.fromtarfile(tarfile)
1402        except HeaderError:
1403            raise SubsequentHeaderError("missing or bad subsequent header")
1404
1405        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1406            # Patch the TarInfo object with the extended header info.
1407            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1408            next.offset = self.offset
1409
1410            if "size" in pax_headers:
1411                # If the extended header replaces the size field,
1412                # we need to recalculate the offset where the next
1413                # header starts.
1414                offset = next.offset_data
1415                if next.isreg() or next.type not in SUPPORTED_TYPES:
1416                    offset += next._block(next.size)
1417                tarfile.offset = offset
1418
1419        return next
1420
1421    def _apply_pax_info(self, pax_headers, encoding, errors):
1422        """Replace fields with supplemental information from a previous
1423           pax extended or global header.
1424        """
1425        for keyword, value in pax_headers.iteritems():
1426            if keyword not in PAX_FIELDS:
1427                continue
1428
1429            if keyword == "path":
1430                value = value.rstrip("/")
1431
1432            if keyword in PAX_NUMBER_FIELDS:
1433                try:
1434                    value = PAX_NUMBER_FIELDS[keyword](value)
1435                except ValueError:
1436                    value = 0
1437            else:
1438                value = uts(value, encoding, errors)
1439
1440            setattr(self, keyword, value)
1441
1442        self.pax_headers = pax_headers.copy()
1443
1444    def _block(self, count):
1445        """Round up a byte count by BLOCKSIZE and return it,
1446           e.g. _block(834) => 1024.
1447        """
1448        blocks, remainder = divmod(count, BLOCKSIZE)
1449        if remainder:
1450            blocks += 1
1451        return blocks * BLOCKSIZE
1452
1453    def isreg(self):
1454        return self.type in REGULAR_TYPES
1455    def isfile(self):
1456        return self.isreg()
1457    def isdir(self):
1458        return self.type == DIRTYPE
1459    def issym(self):
1460        return self.type == SYMTYPE
1461    def islnk(self):
1462        return self.type == LNKTYPE
1463    def ischr(self):
1464        return self.type == CHRTYPE
1465    def isblk(self):
1466        return self.type == BLKTYPE
1467    def isfifo(self):
1468        return self.type == FIFOTYPE
1469    def issparse(self):
1470        return self.type == GNUTYPE_SPARSE
1471    def isdev(self):
1472        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1473# class TarInfo
1474
1475class TarFile(object):
1476    """The TarFile Class provides an interface to tar archives.
1477    """
1478
1479    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1480
1481    dereference = False         # If true, add content of linked file to the
1482                                # tar file, else the link.
1483
1484    ignore_zeros = False        # If true, skips empty or invalid blocks and
1485                                # continues processing.
1486
1487    errorlevel = 1              # If 0, fatal errors only appear in debug
1488                                # messages (if debug >= 0). If > 0, errors
1489                                # are passed to the caller as exceptions.
1490
1491    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1492
1493    encoding = ENCODING         # Encoding for 8-bit character strings.
1494
1495    errors = None               # Error handler for unicode conversion.
1496
1497    tarinfo = TarInfo           # The default TarInfo class to use.
1498
1499    fileobject = ExFileObject   # The default ExFileObject class to use.
1500
1501    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1502            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1503            errors=None, pax_headers=None, debug=None, errorlevel=None):
1504        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1505           read from an existing archive, 'a' to append data to an existing
1506           file or 'w' to create a new file overwriting an existing one. `mode'
1507           defaults to 'r'.
1508           If `fileobj' is given, it is used for reading or writing data. If it
1509           can be determined, `mode' is overridden by `fileobj's mode.
1510           `fileobj' is not closed, when TarFile is closed.
1511        """
1512        if len(mode) > 1 or mode not in "raw":
1513            raise ValueError("mode must be 'r', 'a' or 'w'")
1514        self.mode = mode
1515        self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1516
1517        if not fileobj:
1518            if self.mode == "a" and not os.path.exists(name):
1519                # Create nonexistent files in append mode.
1520                self.mode = "w"
1521                self._mode = "wb"
1522            fileobj = bltn_open(name, self._mode)
1523            self._extfileobj = False
1524        else:
1525            if name is None and hasattr(fileobj, "name"):
1526                name = fileobj.name
1527            if hasattr(fileobj, "mode"):
1528                self._mode = fileobj.mode
1529            self._extfileobj = True
1530        self.name = os.path.abspath(name) if name else None
1531        self.fileobj = fileobj
1532
1533        # Init attributes.
1534        if format is not None:
1535            self.format = format
1536        if tarinfo is not None:
1537            self.tarinfo = tarinfo
1538        if dereference is not None:
1539            self.dereference = dereference
1540        if ignore_zeros is not None:
1541            self.ignore_zeros = ignore_zeros
1542        if encoding is not None:
1543            self.encoding = encoding
1544
1545        if errors is not None:
1546            self.errors = errors
1547        elif mode == "r":
1548            self.errors = "utf-8"
1549        else:
1550            self.errors = "strict"
1551
1552        if pax_headers is not None and self.format == PAX_FORMAT:
1553            self.pax_headers = pax_headers
1554        else:
1555            self.pax_headers = {}
1556
1557        if debug is not None:
1558            self.debug = debug
1559        if errorlevel is not None:
1560            self.errorlevel = errorlevel
1561
1562        # Init datastructures.
1563        self.closed = False
1564        self.members = []       # list of members as TarInfo objects
1565        self._loaded = False    # flag if all members have been read
1566        self.offset = self.fileobj.tell()
1567                                # current position in the archive file
1568        self.inodes = {}        # dictionary caching the inodes of
1569                                # archive members already added
1570
1571        try:
1572            if self.mode == "r":
1573                self.firstmember = None
1574                self.firstmember = self.next()
1575
1576            if self.mode == "a":
1577                # Move to the end of the archive,
1578                # before the first empty block.
1579                while True:
1580                    self.fileobj.seek(self.offset)
1581                    try:
1582                        tarinfo = self.tarinfo.fromtarfile(self)
1583                        self.members.append(tarinfo)
1584                    except EOFHeaderError:
1585                        self.fileobj.seek(self.offset)
1586                        break
1587                    except HeaderError, e:
1588                        raise ReadError(str(e))
1589
1590            if self.mode in "aw":
1591                self._loaded = True
1592
1593                if self.pax_headers:
1594                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1595                    self.fileobj.write(buf)
1596                    self.offset += len(buf)
1597        except:
1598            if not self._extfileobj:
1599                self.fileobj.close()
1600            self.closed = True
1601            raise
1602
1603    def _getposix(self):
1604        return self.format == USTAR_FORMAT
1605    def _setposix(self, value):
1606        import warnings
1607        warnings.warn("use the format attribute instead", DeprecationWarning,
1608                      2)
1609        if value:
1610            self.format = USTAR_FORMAT
1611        else:
1612            self.format = GNU_FORMAT
1613    posix = property(_getposix, _setposix)
1614
1615    #--------------------------------------------------------------------------
1616    # Below are the classmethods which act as alternate constructors to the
1617    # TarFile class. The open() method is the only one that is needed for
1618    # public use; it is the "super"-constructor and is able to select an
1619    # adequate "sub"-constructor for a particular compression using the mapping
1620    # from OPEN_METH.
1621    #
1622    # This concept allows one to subclass TarFile without losing the comfort of
1623    # the super-constructor. A sub-constructor is registered and made available
1624    # by adding it to the mapping in OPEN_METH.
1625
1626    @classmethod
1627    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1628        """Open a tar archive for reading, writing or appending. Return
1629           an appropriate TarFile class.
1630
1631           mode:
1632           'r' or 'r:*' open for reading with transparent compression
1633           'r:'         open for reading exclusively uncompressed
1634           'r:gz'       open for reading with gzip compression
1635           'r:bz2'      open for reading with bzip2 compression
1636           'a' or 'a:'  open for appending, creating the file if necessary
1637           'w' or 'w:'  open for writing without compression
1638           'w:gz'       open for writing with gzip compression
1639           'w:bz2'      open for writing with bzip2 compression
1640
1641           'r|*'        open a stream of tar blocks with transparent compression
1642           'r|'         open an uncompressed stream of tar blocks for reading
1643           'r|gz'       open a gzip compressed stream of tar blocks
1644           'r|bz2'      open a bzip2 compressed stream of tar blocks
1645           'w|'         open an uncompressed stream for writing
1646           'w|gz'       open a gzip compressed stream for writing
1647           'w|bz2'      open a bzip2 compressed stream for writing
1648        """
1649
1650        if not name and not fileobj:
1651            raise ValueError("nothing to open")
1652
1653        if mode in ("r", "r:*"):
1654            # Find out which *open() is appropriate for opening the file.
1655            for comptype in cls.OPEN_METH:
1656                func = getattr(cls, cls.OPEN_METH[comptype])
1657                if fileobj is not None:
1658                    saved_pos = fileobj.tell()
1659                try:
1660                    return func(name, "r", fileobj, **kwargs)
1661                except (ReadError, CompressionError), e:
1662                    if fileobj is not None:
1663                        fileobj.seek(saved_pos)
1664                    continue
1665            raise ReadError("file could not be opened successfully")
1666
1667        elif ":" in mode:
1668            filemode, comptype = mode.split(":", 1)
1669            filemode = filemode or "r"
1670            comptype = comptype or "tar"
1671
1672            # Select the *open() function according to
1673            # given compression.
1674            if comptype in cls.OPEN_METH:
1675                func = getattr(cls, cls.OPEN_METH[comptype])
1676            else:
1677                raise CompressionError("unknown compression type %r" % comptype)
1678            return func(name, filemode, fileobj, **kwargs)
1679
1680        elif "|" in mode:
1681            filemode, comptype = mode.split("|", 1)
1682            filemode = filemode or "r"
1683            comptype = comptype or "tar"
1684
1685            if filemode not in "rw":
1686                raise ValueError("mode must be 'r' or 'w'")
1687
1688            t = cls(name, filemode,
1689                    _Stream(name, filemode, comptype, fileobj, bufsize),
1690                    **kwargs)
1691            t._extfileobj = False
1692            return t
1693
1694        elif mode in "aw":
1695            return cls.taropen(name, mode, fileobj, **kwargs)
1696
1697        raise ValueError("undiscernible mode")
1698
1699    @classmethod
1700    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1701        """Open uncompressed tar archive name for reading or writing.
1702        """
1703        if len(mode) > 1 or mode not in "raw":
1704            raise ValueError("mode must be 'r', 'a' or 'w'")
1705        return cls(name, mode, fileobj, **kwargs)
1706
1707    @classmethod
1708    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1709        """Open gzip compressed tar archive name for reading or writing.
1710           Appending is not allowed.
1711        """
1712        if len(mode) > 1 or mode not in "rw":
1713            raise ValueError("mode must be 'r' or 'w'")
1714
1715        try:
1716            import gzip
1717            gzip.GzipFile
1718        except (ImportError, AttributeError):
1719            raise CompressionError("gzip module is not available")
1720
1721        if fileobj is None:
1722            fileobj = bltn_open(name, mode + "b")
1723
1724        try:
1725            t = cls.taropen(name, mode,
1726                gzip.GzipFile(name, mode, compresslevel, fileobj),
1727                **kwargs)
1728        except IOError:
1729            raise ReadError("not a gzip file")
1730        t._extfileobj = False
1731        return t
1732
1733    @classmethod
1734    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1735        """Open bzip2 compressed tar archive name for reading or writing.
1736           Appending is not allowed.
1737        """
1738        if len(mode) > 1 or mode not in "rw":
1739            raise ValueError("mode must be 'r' or 'w'.")
1740
1741        try:
1742            import bz2
1743        except ImportError:
1744            raise CompressionError("bz2 module is not available")
1745
1746        if fileobj is not None:
1747            fileobj = _BZ2Proxy(fileobj, mode)
1748        else:
1749            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1750
1751        try:
1752            t = cls.taropen(name, mode, fileobj, **kwargs)
1753        except (IOError, EOFError):
1754            raise ReadError("not a bzip2 file")
1755        t._extfileobj = False
1756        return t
1757
1758    # All *open() methods are registered here.
1759    OPEN_METH = {
1760        "tar": "taropen",   # uncompressed tar
1761        "gz":  "gzopen",    # gzip compressed tar
1762        "bz2": "bz2open"    # bzip2 compressed tar
1763    }
1764
1765    #--------------------------------------------------------------------------
1766    # The public methods which TarFile provides:
1767
1768    def close(self):
1769        """Close the TarFile. In write-mode, two finishing zero blocks are
1770           appended to the archive.
1771        """
1772        if self.closed:
1773            return
1774
1775        if self.mode in "aw":
1776            self.fileobj.write(NUL * (BLOCKSIZE * 2))
1777            self.offset += (BLOCKSIZE * 2)
1778            # fill up the end with zero-blocks
1779            # (like option -b20 for tar does)
1780            blocks, remainder = divmod(self.offset, RECORDSIZE)
1781            if remainder > 0:
1782                self.fileobj.write(NUL * (RECORDSIZE - remainder))
1783
1784        if not self._extfileobj:
1785            self.fileobj.close()
1786        self.closed = True
1787
1788    def getmember(self, name):
1789        """Return a TarInfo object for member `name'. If `name' can not be
1790           found in the archive, KeyError is raised. If a member occurs more
1791           than once in the archive, its last occurrence is assumed to be the
1792           most up-to-date version.
1793        """
1794        tarinfo = self._getmember(name)
1795        if tarinfo is None:
1796            raise KeyError("filename %r not found" % name)
1797        return tarinfo
1798
1799    def getmembers(self):
1800        """Return the members of the archive as a list of TarInfo objects. The
1801           list has the same order as the members in the archive.
1802        """
1803        self._check()
1804        if not self._loaded:    # if we want to obtain a list of
1805            self._load()        # all members, we first have to
1806                                # scan the whole archive.
1807        return self.members
1808
1809    def getnames(self):
1810        """Return the members of the archive as a list of their names. It has
1811           the same order as the list returned by getmembers().
1812        """
1813        return [tarinfo.name for tarinfo in self.getmembers()]
1814
1815    def gettarinfo(self, name=None, arcname=None, fileobj=None):
1816        """Create a TarInfo object for either the file `name' or the file
1817           object `fileobj' (using os.fstat on its file descriptor). You can
1818           modify some of the TarInfo's attributes before you add it using
1819           addfile(). If given, `arcname' specifies an alternative name for the
1820           file in the archive.
1821        """
1822        self._check("aw")
1823
1824        # When fileobj is given, replace name by
1825        # fileobj's real name.
1826        if fileobj is not None:
1827            name = fileobj.name
1828
1829        # Building the name of the member in the archive.
1830        # Backward slashes are converted to forward slashes,
1831        # Absolute paths are turned to relative paths.
1832        if arcname is None:
1833            arcname = name
1834        drv, arcname = os.path.splitdrive(arcname)
1835        arcname = arcname.replace(os.sep, "/")
1836        arcname = arcname.lstrip("/")
1837
1838        # Now, fill the TarInfo object with
1839        # information specific for the file.
1840        tarinfo = self.tarinfo()
1841        tarinfo.tarfile = self
1842
1843        # Use os.stat or os.lstat, depending on platform
1844        # and if symlinks shall be resolved.
1845        if fileobj is None:
1846            if hasattr(os, "lstat") and not self.dereference:
1847                statres = os.lstat(name)
1848            else:
1849                statres = os.stat(name)
1850        else:
1851            statres = os.fstat(fileobj.fileno())
1852        linkname = ""
1853
1854        stmd = statres.st_mode
1855        if stat.S_ISREG(stmd):
1856            inode = (statres.st_ino, statres.st_dev)
1857            if not self.dereference and statres.st_nlink > 1 and \
1858                    inode in self.inodes and arcname != self.inodes[inode]:
1859                # Is it a hardlink to an already
1860                # archived file?
1861                type = LNKTYPE
1862                linkname = self.inodes[inode]
1863            else:
1864                # The inode is added only if its valid.
1865                # For win32 it is always 0.
1866                type = REGTYPE
1867                if inode[0]:
1868                    self.inodes[inode] = arcname
1869        elif stat.S_ISDIR(stmd):
1870            type = DIRTYPE
1871        elif stat.S_ISFIFO(stmd):
1872            type = FIFOTYPE
1873        elif stat.S_ISLNK(stmd):
1874            type = SYMTYPE
1875            linkname = os.readlink(name)
1876        elif stat.S_ISCHR(stmd):
1877            type = CHRTYPE
1878        elif stat.S_ISBLK(stmd):
1879            type = BLKTYPE
1880        else:
1881            return None
1882
1883        # Fill the TarInfo object with all
1884        # information we can get.
1885        tarinfo.name = arcname
1886        tarinfo.mode = stmd
1887        tarinfo.uid = statres.st_uid
1888        tarinfo.gid = statres.st_gid
1889        if type == REGTYPE:
1890            tarinfo.size = statres.st_size
1891        else:
1892            tarinfo.size = 0L
1893        tarinfo.mtime = statres.st_mtime
1894        tarinfo.type = type
1895        tarinfo.linkname = linkname
1896        if pwd:
1897            try:
1898                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1899            except KeyError:
1900                pass
1901        if grp:
1902            try:
1903                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1904            except KeyError:
1905                pass
1906
1907        if type in (CHRTYPE, BLKTYPE):
1908            if hasattr(os, "major") and hasattr(os, "minor"):
1909                tarinfo.devmajor = os.major(statres.st_rdev)
1910                tarinfo.devminor = os.minor(statres.st_rdev)
1911        return tarinfo
1912
1913    def list(self, verbose=True):
1914        """Print a table of contents to sys.stdout. If `verbose' is False, only
1915           the names of the members are printed. If it is True, an `ls -l'-like
1916           output is produced.
1917        """
1918        self._check()
1919
1920        for tarinfo in self:
1921            if verbose:
1922                print filemode(tarinfo.mode),
1923                print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1924                                 tarinfo.gname or tarinfo.gid),
1925                if tarinfo.ischr() or tarinfo.isblk():
1926                    print "%10s" % ("%d,%d" \
1927                                    % (tarinfo.devmajor, tarinfo.devminor)),
1928                else:
1929                    print "%10d" % tarinfo.size,
1930                print "%d-%02d-%02d %02d:%02d:%02d" \
1931                      % time.localtime(tarinfo.mtime)[:6],
1932
1933            print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1934
1935            if verbose:
1936                if tarinfo.issym():
1937                    print "->", tarinfo.linkname,
1938                if tarinfo.islnk():
1939                    print "link to", tarinfo.linkname,
1940            print
1941
1942    def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1943        """Add the file `name' to the archive. `name' may be any type of file
1944           (directory, fifo, symbolic link, etc.). If given, `arcname'
1945           specifies an alternative name for the file in the archive.
1946           Directories are added recursively by default. This can be avoided by
1947           setting `recursive' to False. `exclude' is a function that should
1948           return True for each filename to be excluded. `filter' is a function
1949           that expects a TarInfo object argument and returns the changed
1950           TarInfo object, if it returns None the TarInfo object will be
1951           excluded from the archive.
1952        """
1953        self._check("aw")
1954
1955        if arcname is None:
1956            arcname = name
1957
1958        # Exclude pathnames.
1959        if exclude is not None:
1960            import warnings
1961            warnings.warn("use the filter argument instead",
1962                    DeprecationWarning, 2)
1963            if exclude(name):
1964                self._dbg(2, "tarfile: Excluded %r" % name)
1965                return
1966
1967        # Skip if somebody tries to archive the archive...
1968        if self.name is not None and os.path.abspath(name) == self.name:
1969            self._dbg(2, "tarfile: Skipped %r" % name)
1970            return
1971
1972        self._dbg(1, name)
1973
1974        # Create a TarInfo object from the file.
1975        tarinfo = self.gettarinfo(name, arcname)
1976
1977        if tarinfo is None:
1978            self._dbg(1, "tarfile: Unsupported type %r" % name)
1979            return
1980
1981        # Change or exclude the TarInfo object.
1982        if filter is not None:
1983            tarinfo = filter(tarinfo)
1984            if tarinfo is None:
1985                self._dbg(2, "tarfile: Excluded %r" % name)
1986                return
1987
1988        # Append the tar header and data to the archive.
1989        if tarinfo.isreg():
1990            with bltn_open(name, "rb") as f:
1991                self.addfile(tarinfo, f)
1992
1993        elif tarinfo.isdir():
1994            self.addfile(tarinfo)
1995            if recursive:
1996                for f in os.listdir(name):
1997                    self.add(os.path.join(name, f), os.path.join(arcname, f),
1998                            recursive, exclude, filter)
1999
2000        else:
2001            self.addfile(tarinfo)
2002
2003    def addfile(self, tarinfo, fileobj=None):
2004        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2005           given, tarinfo.size bytes are read from it and added to the archive.
2006           You can create TarInfo objects using gettarinfo().
2007           On Windows platforms, `fileobj' should always be opened with mode
2008           'rb' to avoid irritation about the file size.
2009        """
2010        self._check("aw")
2011
2012        tarinfo = copy.copy(tarinfo)
2013
2014        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2015        self.fileobj.write(buf)
2016        self.offset += len(buf)
2017
2018        # If there's data to follow, append it.
2019        if fileobj is not None:
2020            copyfileobj(fileobj, self.fileobj, tarinfo.size)
2021            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2022            if remainder > 0:
2023                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2024                blocks += 1
2025            self.offset += blocks * BLOCKSIZE
2026
2027        self.members.append(tarinfo)
2028
2029    def extractall(self, path=".", members=None):
2030        """Extract all members from the archive to the current working
2031           directory and set owner, modification time and permissions on
2032           directories afterwards. `path' specifies a different directory
2033           to extract to. `members' is optional and must be a subset of the
2034           list returned by getmembers().
2035        """
2036        directories = []
2037
2038        if members is None:
2039            members = self
2040
2041        for tarinfo in members:
2042            if tarinfo.isdir():
2043                # Extract directories with a safe mode.
2044                directories.append(tarinfo)
2045                tarinfo = copy.copy(tarinfo)
2046                tarinfo.mode = 0700
2047            self.extract(tarinfo, path)
2048
2049        # Reverse sort directories.
2050        directories.sort(key=operator.attrgetter('name'))
2051        directories.reverse()
2052
2053        # Set correct owner, mtime and filemode on directories.
2054        for tarinfo in directories:
2055            dirpath = os.path.join(path, tarinfo.name)
2056            try:
2057                self.chown(tarinfo, dirpath)
2058                self.utime(tarinfo, dirpath)
2059                self.chmod(tarinfo, dirpath)
2060            except ExtractError, e:
2061                if self.errorlevel > 1:
2062                    raise
2063                else:
2064                    self._dbg(1, "tarfile: %s" % e)
2065
2066    def extract(self, member, path=""):
2067        """Extract a member from the archive to the current working directory,
2068           using its full name. Its file information is extracted as accurately
2069           as possible. `member' may be a filename or a TarInfo object. You can
2070           specify a different directory using `path'.
2071        """
2072        self._check("r")
2073
2074        if isinstance(member, basestring):
2075            tarinfo = self.getmember(member)
2076        else:
2077            tarinfo = member
2078
2079        # Prepare the link target for makelink().
2080        if tarinfo.islnk():
2081            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2082
2083        try:
2084            self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2085        except EnvironmentError, e:
2086            if self.errorlevel > 0:
2087                raise
2088            else:
2089                if e.filename is None:
2090                    self._dbg(1, "tarfile: %s" % e.strerror)
2091                else:
2092                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2093        except ExtractError, e:
2094            if self.errorlevel > 1:
2095                raise
2096            else:
2097                self._dbg(1, "tarfile: %s" % e)
2098
2099    def extractfile(self, member):
2100        """Extract a member from the archive as a file object. `member' may be
2101           a filename or a TarInfo object. If `member' is a regular file, a
2102           file-like object is returned. If `member' is a link, a file-like
2103           object is constructed from the link's target. If `member' is none of
2104           the above, None is returned.
2105           The file-like object is read-only and provides the following
2106           methods: read(), readline(), readlines(), seek() and tell()
2107        """
2108        self._check("r")
2109
2110        if isinstance(member, basestring):
2111            tarinfo = self.getmember(member)
2112        else:
2113            tarinfo = member
2114
2115        if tarinfo.isreg():
2116            return self.fileobject(self, tarinfo)
2117
2118        elif tarinfo.type not in SUPPORTED_TYPES:
2119            # If a member's type is unknown, it is treated as a
2120            # regular file.
2121            return self.fileobject(self, tarinfo)
2122
2123        elif tarinfo.islnk() or tarinfo.issym():
2124            if isinstance(self.fileobj, _Stream):
2125                # A small but ugly workaround for the case that someone tries
2126                # to extract a (sym)link as a file-object from a non-seekable
2127                # stream of tar blocks.
2128                raise StreamError("cannot extract (sym)link as file object")
2129            else:
2130                # A (sym)link's file object is its target's file object.
2131                return self.extractfile(self._find_link_target(tarinfo))
2132        else:
2133            # If there's no data associated with the member (directory, chrdev,
2134            # blkdev, etc.), return None instead of a file object.
2135            return None
2136
2137    def _extract_member(self, tarinfo, targetpath):
2138        """Extract the TarInfo object tarinfo to a physical
2139           file called targetpath.
2140        """
2141        # Fetch the TarInfo object for the given name
2142        # and build the destination pathname, replacing
2143        # forward slashes to platform specific separators.
2144        targetpath = targetpath.rstrip("/")
2145        targetpath = targetpath.replace("/", os.sep)
2146
2147        # Create all upper directories.
2148        upperdirs = os.path.dirname(targetpath)
2149        if upperdirs and not os.path.exists(upperdirs):
2150            # Create directories that are not part of the archive with
2151            # default permissions.
2152            os.makedirs(upperdirs)
2153
2154        if tarinfo.islnk() or tarinfo.issym():
2155            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2156        else:
2157            self._dbg(1, tarinfo.name)
2158
2159        if tarinfo.isreg():
2160            self.makefile(tarinfo, targetpath)
2161        elif tarinfo.isdir():
2162            self.makedir(tarinfo, targetpath)
2163        elif tarinfo.isfifo():
2164            self.makefifo(tarinfo, targetpath)
2165        elif tarinfo.ischr() or tarinfo.isblk():
2166            self.makedev(tarinfo, targetpath)
2167        elif tarinfo.islnk() or tarinfo.issym():
2168            self.makelink(tarinfo, targetpath)
2169        elif tarinfo.type not in SUPPORTED_TYPES:
2170            self.makeunknown(tarinfo, targetpath)
2171        else:
2172            self.makefile(tarinfo, targetpath)
2173
2174        self.chown(tarinfo, targetpath)
2175        if not tarinfo.issym():
2176            self.chmod(tarinfo, targetpath)
2177            self.utime(tarinfo, targetpath)
2178
2179    #--------------------------------------------------------------------------
2180    # Below are the different file methods. They are called via
2181    # _extract_member() when extract() is called. They can be replaced in a
2182    # subclass to implement other functionality.
2183
2184    def makedir(self, tarinfo, targetpath):
2185        """Make a directory called targetpath.
2186        """
2187        try:
2188            # Use a safe mode for the directory, the real mode is set
2189            # later in _extract_member().
2190            os.mkdir(targetpath, 0700)
2191        except EnvironmentError, e:
2192            if e.errno != errno.EEXIST:
2193                raise
2194
2195    def makefile(self, tarinfo, targetpath):
2196        """Make a file called targetpath.
2197        """
2198        source = self.extractfile(tarinfo)
2199        try:
2200            with bltn_open(targetpath, "wb") as target:
2201                copyfileobj(source, target)
2202        finally:
2203            source.close()
2204
2205    def makeunknown(self, tarinfo, targetpath):
2206        """Make a file from a TarInfo object with an unknown type
2207           at targetpath.
2208        """
2209        self.makefile(tarinfo, targetpath)
2210        self._dbg(1, "tarfile: Unknown file type %r, " \
2211                     "extracted as regular file." % tarinfo.type)
2212
2213    def makefifo(self, tarinfo, targetpath):
2214        """Make a fifo called targetpath.
2215        """
2216        if hasattr(os, "mkfifo"):
2217            os.mkfifo(targetpath)
2218        else:
2219            raise ExtractError("fifo not supported by system")
2220
2221    def makedev(self, tarinfo, targetpath):
2222        """Make a character or block device called targetpath.
2223        """
2224        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2225            raise ExtractError("special devices not supported by system")
2226
2227        mode = tarinfo.mode
2228        if tarinfo.isblk():
2229            mode |= stat.S_IFBLK
2230        else:
2231            mode |= stat.S_IFCHR
2232
2233        os.mknod(targetpath, mode,
2234                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2235
2236    def makelink(self, tarinfo, targetpath):
2237        """Make a (symbolic) link called targetpath. If it cannot be created
2238          (platform limitation), we try to make a copy of the referenced file
2239          instead of a link.
2240        """
2241        if hasattr(os, "symlink") and hasattr(os, "link"):
2242            # For systems that support symbolic and hard links.
2243            if tarinfo.issym():
2244                if os.path.lexists(targetpath):
2245                    os.unlink(targetpath)
2246                os.symlink(tarinfo.linkname, targetpath)
2247            else:
2248                # See extract().
2249                if os.path.exists(tarinfo._link_target):
2250                    if os.path.lexists(targetpath):
2251                        os.unlink(targetpath)
2252                    os.link(tarinfo._link_target, targetpath)
2253                else:
2254                    self._extract_member(self._find_link_target(tarinfo), targetpath)
2255        else:
2256            try:
2257                self._extract_member(self._find_link_target(tarinfo), targetpath)
2258            except KeyError:
2259                raise ExtractError("unable to resolve link inside archive")
2260
2261    def chown(self, tarinfo, targetpath):
2262        """Set owner of targetpath according to tarinfo.
2263        """
2264        if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2265            # We have to be root to do so.
2266            try:
2267                g = grp.getgrnam(tarinfo.gname)[2]
2268            except KeyError:
2269                g = tarinfo.gid
2270            try:
2271                u = pwd.getpwnam(tarinfo.uname)[2]
2272            except KeyError:
2273                u = tarinfo.uid
2274            try:
2275                if tarinfo.issym() and hasattr(os, "lchown"):
2276                    os.lchown(targetpath, u, g)
2277                else:
2278                    if sys.platform != "os2emx":
2279                        os.chown(targetpath, u, g)
2280            except EnvironmentError, e:
2281                raise ExtractError("could not change owner")
2282
2283    def chmod(self, tarinfo, targetpath):
2284        """Set file permissions of targetpath according to tarinfo.
2285        """
2286        if hasattr(os, 'chmod'):
2287            try:
2288                os.chmod(targetpath, tarinfo.mode)
2289            except EnvironmentError, e:
2290                raise ExtractError("could not change mode")
2291
2292    def utime(self, tarinfo, targetpath):
2293        """Set modification time of targetpath according to tarinfo.
2294        """
2295        if not hasattr(os, 'utime'):
2296            return
2297        try:
2298            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2299        except EnvironmentError, e:
2300            raise ExtractError("could not change modification time")
2301
2302    #--------------------------------------------------------------------------
2303    def next(self):
2304        """Return the next member of the archive as a TarInfo object, when
2305           TarFile is opened for reading. Return None if there is no more
2306           available.
2307        """
2308        self._check("ra")
2309        if self.firstmember is not None:
2310            m = self.firstmember
2311            self.firstmember = None
2312            return m
2313
2314        # Read the next block.
2315        self.fileobj.seek(self.offset)
2316        tarinfo = None
2317        while True:
2318            try:
2319                tarinfo = self.tarinfo.fromtarfile(self)
2320            except EOFHeaderError, e:
2321                if self.ignore_zeros:
2322                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2323                    self.offset += BLOCKSIZE
2324                    continue
2325            except InvalidHeaderError, e:
2326                if self.ignore_zeros:
2327                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2328                    self.offset += BLOCKSIZE
2329                    continue
2330                elif self.offset == 0:
2331                    raise ReadError(str(e))
2332            except EmptyHeaderError:
2333                if self.offset == 0:
2334                    raise ReadError("empty file")
2335            except TruncatedHeaderError, e:
2336                if self.offset == 0:
2337                    raise ReadError(str(e))
2338            except SubsequentHeaderError, e:
2339                raise ReadError(str(e))
2340            break
2341
2342        if tarinfo is not None:
2343            self.members.append(tarinfo)
2344        else:
2345            self._loaded = True
2346
2347        return tarinfo
2348
2349    #--------------------------------------------------------------------------
2350    # Little helper methods:
2351
2352    def _getmember(self, name, tarinfo=None, normalize=False):
2353        """Find an archive member by name from bottom to top.
2354           If tarinfo is given, it is used as the starting point.
2355        """
2356        # Ensure that all members have been loaded.
2357        members = self.getmembers()
2358
2359        # Limit the member search list up to tarinfo.
2360        if tarinfo is not None:
2361            members = members[:members.index(tarinfo)]
2362
2363        if normalize:
2364            name = os.path.normpath(name)
2365
2366        for member in reversed(members):
2367            if normalize:
2368                member_name = os.path.normpath(member.name)
2369            else:
2370                member_name = member.name
2371
2372            if name == member_name:
2373                return member
2374
2375    def _load(self):
2376        """Read through the entire archive file and look for readable
2377           members.
2378        """
2379        while True:
2380            tarinfo = self.next()
2381            if tarinfo is None:
2382                break
2383        self._loaded = True
2384
2385    def _check(self, mode=None):
2386        """Check if TarFile is still open, and if the operation's mode
2387           corresponds to TarFile's mode.
2388        """
2389        if self.closed:
2390            raise IOError("%s is closed" % self.__class__.__name__)
2391        if mode is not None and self.mode not in mode:
2392            raise IOError("bad operation for mode %r" % self.mode)
2393
2394    def _find_link_target(self, tarinfo):
2395        """Find the target member of a symlink or hardlink member in the
2396           archive.
2397        """
2398        if tarinfo.issym():
2399            # Always search the entire archive.
2400            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2401            limit = None
2402        else:
2403            # Search the archive before the link, because a hard link is
2404            # just a reference to an already archived file.
2405            linkname = tarinfo.linkname
2406            limit = tarinfo
2407
2408        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2409        if member is None:
2410            raise KeyError("linkname %r not found" % linkname)
2411        return member
2412
2413    def __iter__(self):
2414        """Provide an iterator object.
2415        """
2416        if self._loaded:
2417            return iter(self.members)
2418        else:
2419            return TarIter(self)
2420
2421    def _dbg(self, level, msg):
2422        """Write debugging output to sys.stderr.
2423        """
2424        if level <= self.debug:
2425            print >> sys.stderr, msg
2426
2427    def __enter__(self):
2428        self._check()
2429        return self
2430
2431    def __exit__(self, type, value, traceback):
2432        if type is None:
2433            self.close()
2434        else:
2435            # An exception occurred. We must not call close() because
2436            # it would try to write end-of-archive blocks and padding.
2437            if not self._extfileobj:
2438                self.fileobj.close()
2439            self.closed = True
2440# class TarFile
2441
2442class TarIter:
2443    """Iterator Class.
2444
2445       for tarinfo in TarFile(...):
2446           suite...
2447    """
2448
2449    def __init__(self, tarfile):
2450        """Construct a TarIter object.
2451        """
2452        self.tarfile = tarfile
2453        self.index = 0
2454    def __iter__(self):
2455        """Return iterator object.
2456        """
2457        return self
2458    def next(self):
2459        """Return the next item using TarFile's next() method.
2460           When all members have been read, set TarFile as _loaded.
2461        """
2462        # Fix for SF #1100429: Under rare circumstances it can
2463        # happen that getmembers() is called during iteration,
2464        # which will cause TarIter to stop prematurely.
2465
2466        if self.index == 0 and self.tarfile.firstmember is not None:
2467            tarinfo = self.tarfile.next()
2468        elif self.index < len(self.tarfile.members):
2469            tarinfo = self.tarfile.members[self.index]
2470        elif not self.tarfile._loaded:
2471            tarinfo = self.tarfile.next()
2472            if not tarinfo:
2473                self.tarfile._loaded = True
2474                raise StopIteration
2475        else:
2476            raise StopIteration
2477        self.index += 1
2478        return tarinfo
2479
2480# Helper classes for sparse file support
2481class _section:
2482    """Base class for _data and _hole.
2483    """
2484    def __init__(self, offset, size):
2485        self.offset = offset
2486        self.size = size
2487    def __contains__(self, offset):
2488        return self.offset <= offset < self.offset + self.size
2489
2490class _data(_section):
2491    """Represent a data section in a sparse file.
2492    """
2493    def __init__(self, offset, size, realpos):
2494        _section.__init__(self, offset, size)
2495        self.realpos = realpos
2496
2497class _hole(_section):
2498    """Represent a hole section in a sparse file.
2499    """
2500    pass
2501
2502class _ringbuffer(list):
2503    """Ringbuffer class which increases performance
2504       over a regular list.
2505    """
2506    def __init__(self):
2507        self.idx = 0
2508    def find(self, offset):
2509        idx = self.idx
2510        while True:
2511            item = self[idx]
2512            if offset in item:
2513                break
2514            idx += 1
2515            if idx == len(self):
2516                idx = 0
2517            if idx == self.idx:
2518                # End of File
2519                return None
2520        self.idx = idx
2521        return item
2522
2523#---------------------------------------------
2524# zipfile compatible TarFile class
2525#---------------------------------------------
2526TAR_PLAIN = 0           # zipfile.ZIP_STORED
2527TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2528class TarFileCompat:
2529    """TarFile class compatible with standard module zipfile's
2530       ZipFile class.
2531    """
2532    def __init__(self, file, mode="r", compression=TAR_PLAIN):
2533        from warnings import warnpy3k
2534        warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2535                stacklevel=2)
2536        if compression == TAR_PLAIN:
2537            self.tarfile = TarFile.taropen(file, mode)
2538        elif compression == TAR_GZIPPED:
2539            self.tarfile = TarFile.gzopen(file, mode)
2540        else:
2541            raise ValueError("unknown compression constant")
2542        if mode[0:1] == "r":
2543            members = self.tarfile.getmembers()
2544            for m in members:
2545                m.filename = m.name
2546                m.file_size = m.size
2547                m.date_time = time.gmtime(m.mtime)[:6]
2548    def namelist(self):
2549        return map(lambda m: m.name, self.infolist())
2550    def infolist(self):
2551        return filter(lambda m: m.type in REGULAR_TYPES,
2552                      self.tarfile.getmembers())
2553    def printdir(self):
2554        self.tarfile.list()
2555    def testzip(self):
2556        return
2557    def getinfo(self, name):
2558        return self.tarfile.getmember(name)
2559    def read(self, name):
2560        return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2561    def write(self, filename, arcname=None, compress_type=None):
2562        self.tarfile.add(filename, arcname)
2563    def writestr(self, zinfo, bytes):
2564        try:
2565            from cStringIO import StringIO
2566        except ImportError:
2567            from StringIO import StringIO
2568        import calendar
2569        tinfo = TarInfo(zinfo.filename)
2570        tinfo.size = len(bytes)
2571        tinfo.mtime = calendar.timegm(zinfo.date_time)
2572        self.tarfile.addfile(tinfo, StringIO(bytes))
2573    def close(self):
2574        self.tarfile.close()
2575#class TarFileCompat
2576
2577#--------------------
2578# exported functions
2579#--------------------
2580def is_tarfile(name):
2581    """Return True if name points to a tar archive that we
2582       are able to handle, else return False.
2583    """
2584    try:
2585        t = open(name)
2586        t.close()
2587        return True
2588    except TarError:
2589        return False
2590
2591bltn_open = open
2592open = TarFile.open
2593