1"""Utility functions for copying and archiving files and directory trees.
2
3XXX The functions here don't copy the resource fork or other metadata on Mac.
4
5"""
6
7import os
8import sys
9import stat
10from os.path import abspath
11import fnmatch
12import collections
13import errno
14
15try:
16    from pwd import getpwnam
17except ImportError:
18    getpwnam = None
19
20try:
21    from grp import getgrnam
22except ImportError:
23    getgrnam = None
24
25__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
26           "copytree", "move", "rmtree", "Error", "SpecialFileError",
27           "ExecError", "make_archive", "get_archive_formats",
28           "register_archive_format", "unregister_archive_format",
29           "ignore_patterns"]
30
31class Error(EnvironmentError):
32    pass
33
34class SpecialFileError(EnvironmentError):
35    """Raised when trying to do a kind of operation (e.g. copying) which is
36    not supported on a special file (e.g. a named pipe)"""
37
38class ExecError(EnvironmentError):
39    """Raised when a command could not be executed"""
40
41try:
42    WindowsError
43except NameError:
44    WindowsError = None
45
46def copyfileobj(fsrc, fdst, length=16*1024):
47    """copy data from file-like object fsrc to file-like object fdst"""
48    while 1:
49        buf = fsrc.read(length)
50        if not buf:
51            break
52        fdst.write(buf)
53
54def _samefile(src, dst):
55    # Macintosh, Unix.
56    if hasattr(os.path, 'samefile'):
57        try:
58            return os.path.samefile(src, dst)
59        except OSError:
60            return False
61
62    # All other platforms: check for same pathname.
63    return (os.path.normcase(os.path.abspath(src)) ==
64            os.path.normcase(os.path.abspath(dst)))
65
66def copyfile(src, dst):
67    """Copy data from src to dst"""
68    if _samefile(src, dst):
69        raise Error("`%s` and `%s` are the same file" % (src, dst))
70
71    for fn in [src, dst]:
72        try:
73            st = os.stat(fn)
74        except OSError:
75            # File most likely does not exist
76            pass
77        else:
78            # XXX What about other special files? (sockets, devices...)
79            if stat.S_ISFIFO(st.st_mode):
80                raise SpecialFileError("`%s` is a named pipe" % fn)
81
82    with open(src, 'rb') as fsrc:
83        with open(dst, 'wb') as fdst:
84            copyfileobj(fsrc, fdst)
85
86def copymode(src, dst):
87    """Copy mode bits from src to dst"""
88    if hasattr(os, 'chmod'):
89        st = os.stat(src)
90        mode = stat.S_IMODE(st.st_mode)
91        os.chmod(dst, mode)
92
93def copystat(src, dst):
94    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
95    st = os.stat(src)
96    mode = stat.S_IMODE(st.st_mode)
97    if hasattr(os, 'utime'):
98        os.utime(dst, (st.st_atime, st.st_mtime))
99    if hasattr(os, 'chmod'):
100        os.chmod(dst, mode)
101    if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
102        try:
103            os.chflags(dst, st.st_flags)
104        except OSError, why:
105            for err in 'EOPNOTSUPP', 'ENOTSUP':
106                if hasattr(errno, err) and why.errno == getattr(errno, err):
107                    break
108            else:
109                raise
110
111def copy(src, dst):
112    """Copy data and mode bits ("cp src dst").
113
114    The destination may be a directory.
115
116    """
117    if os.path.isdir(dst):
118        dst = os.path.join(dst, os.path.basename(src))
119    copyfile(src, dst)
120    copymode(src, dst)
121
122def copy2(src, dst):
123    """Copy data and all stat info ("cp -p src dst").
124
125    The destination may be a directory.
126
127    """
128    if os.path.isdir(dst):
129        dst = os.path.join(dst, os.path.basename(src))
130    copyfile(src, dst)
131    copystat(src, dst)
132
133def ignore_patterns(*patterns):
134    """Function that can be used as copytree() ignore parameter.
135
136    Patterns is a sequence of glob-style patterns
137    that are used to exclude files"""
138    def _ignore_patterns(path, names):
139        ignored_names = []
140        for pattern in patterns:
141            ignored_names.extend(fnmatch.filter(names, pattern))
142        return set(ignored_names)
143    return _ignore_patterns
144
145def copytree(src, dst, symlinks=False, ignore=None):
146    """Recursively copy a directory tree using copy2().
147
148    The destination directory must not already exist.
149    If exception(s) occur, an Error is raised with a list of reasons.
150
151    If the optional symlinks flag is true, symbolic links in the
152    source tree result in symbolic links in the destination tree; if
153    it is false, the contents of the files pointed to by symbolic
154    links are copied.
155
156    The optional ignore argument is a callable. If given, it
157    is called with the `src` parameter, which is the directory
158    being visited by copytree(), and `names` which is the list of
159    `src` contents, as returned by os.listdir():
160
161        callable(src, names) -> ignored_names
162
163    Since copytree() is called recursively, the callable will be
164    called once for each directory that is copied. It returns a
165    list of names relative to the `src` directory that should
166    not be copied.
167
168    XXX Consider this example code rather than the ultimate tool.
169
170    """
171    names = os.listdir(src)
172    if ignore is not None:
173        ignored_names = ignore(src, names)
174    else:
175        ignored_names = set()
176
177    os.makedirs(dst)
178    errors = []
179    for name in names:
180        if name in ignored_names:
181            continue
182        srcname = os.path.join(src, name)
183        dstname = os.path.join(dst, name)
184        try:
185            if symlinks and os.path.islink(srcname):
186                linkto = os.readlink(srcname)
187                os.symlink(linkto, dstname)
188            elif os.path.isdir(srcname):
189                copytree(srcname, dstname, symlinks, ignore)
190            else:
191                # Will raise a SpecialFileError for unsupported file types
192                copy2(srcname, dstname)
193        # catch the Error from the recursive copytree so that we can
194        # continue with other files
195        except Error, err:
196            errors.extend(err.args[0])
197        except EnvironmentError, why:
198            errors.append((srcname, dstname, str(why)))
199    try:
200        copystat(src, dst)
201    except OSError, why:
202        if WindowsError is not None and isinstance(why, WindowsError):
203            # Copying file access times may fail on Windows
204            pass
205        else:
206            errors.append((src, dst, str(why)))
207    if errors:
208        raise Error, errors
209
210def rmtree(path, ignore_errors=False, onerror=None):
211    """Recursively delete a directory tree.
212
213    If ignore_errors is set, errors are ignored; otherwise, if onerror
214    is set, it is called to handle the error with arguments (func,
215    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
216    path is the argument to that function that caused it to fail; and
217    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
218    is false and onerror is None, an exception is raised.
219
220    """
221    if ignore_errors:
222        def onerror(*args):
223            pass
224    elif onerror is None:
225        def onerror(*args):
226            raise
227    try:
228        if os.path.islink(path):
229            # symlinks to directories are forbidden, see bug #1669
230            raise OSError("Cannot call rmtree on a symbolic link")
231    except OSError:
232        onerror(os.path.islink, path, sys.exc_info())
233        # can't continue even if onerror hook returns
234        return
235    names = []
236    try:
237        names = os.listdir(path)
238    except os.error, err:
239        onerror(os.listdir, path, sys.exc_info())
240    for name in names:
241        fullname = os.path.join(path, name)
242        try:
243            mode = os.lstat(fullname).st_mode
244        except os.error:
245            mode = 0
246        if stat.S_ISDIR(mode):
247            rmtree(fullname, ignore_errors, onerror)
248        else:
249            try:
250                os.remove(fullname)
251            except os.error, err:
252                onerror(os.remove, fullname, sys.exc_info())
253    try:
254        os.rmdir(path)
255    except os.error:
256        onerror(os.rmdir, path, sys.exc_info())
257
258
259def _basename(path):
260    # A basename() variant which first strips the trailing slash, if present.
261    # Thus we always get the last component of the path, even for directories.
262    return os.path.basename(path.rstrip(os.path.sep))
263
264def move(src, dst):
265    """Recursively move a file or directory to another location. This is
266    similar to the Unix "mv" command.
267
268    If the destination is a directory or a symlink to a directory, the source
269    is moved inside the directory. The destination path must not already
270    exist.
271
272    If the destination already exists but is not a directory, it may be
273    overwritten depending on os.rename() semantics.
274
275    If the destination is on our current filesystem, then rename() is used.
276    Otherwise, src is copied to the destination and then removed.
277    A lot more could be done here...  A look at a mv.c shows a lot of
278    the issues this implementation glosses over.
279
280    """
281    real_dst = dst
282    if os.path.isdir(dst):
283        if _samefile(src, dst):
284            # We might be on a case insensitive filesystem,
285            # perform the rename anyway.
286            os.rename(src, dst)
287            return
288
289        real_dst = os.path.join(dst, _basename(src))
290        if os.path.exists(real_dst):
291            raise Error, "Destination path '%s' already exists" % real_dst
292    try:
293        os.rename(src, real_dst)
294    except OSError:
295        if os.path.isdir(src):
296            if _destinsrc(src, dst):
297                raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
298            copytree(src, real_dst, symlinks=True)
299            rmtree(src)
300        else:
301            copy2(src, real_dst)
302            os.unlink(src)
303
304def _destinsrc(src, dst):
305    src = abspath(src)
306    dst = abspath(dst)
307    if not src.endswith(os.path.sep):
308        src += os.path.sep
309    if not dst.endswith(os.path.sep):
310        dst += os.path.sep
311    return dst.startswith(src)
312
313def _get_gid(name):
314    """Returns a gid, given a group name."""
315    if getgrnam is None or name is None:
316        return None
317    try:
318        result = getgrnam(name)
319    except KeyError:
320        result = None
321    if result is not None:
322        return result[2]
323    return None
324
325def _get_uid(name):
326    """Returns an uid, given a user name."""
327    if getpwnam is None or name is None:
328        return None
329    try:
330        result = getpwnam(name)
331    except KeyError:
332        result = None
333    if result is not None:
334        return result[2]
335    return None
336
337def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
338                  owner=None, group=None, logger=None):
339    """Create a (possibly compressed) tar file from all the files under
340    'base_dir'.
341
342    'compress' must be "gzip" (the default), "bzip2", or None.
343
344    'owner' and 'group' can be used to define an owner and a group for the
345    archive that is being built. If not provided, the current owner and group
346    will be used.
347
348    The output tar file will be named 'base_name' +  ".tar", possibly plus
349    the appropriate compression extension (".gz", or ".bz2").
350
351    Returns the output filename.
352    """
353    tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
354    compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
355
356    # flags for compression program, each element of list will be an argument
357    if compress is not None and compress not in compress_ext.keys():
358        raise ValueError, \
359              ("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
360
361    archive_name = base_name + '.tar' + compress_ext.get(compress, '')
362    archive_dir = os.path.dirname(archive_name)
363
364    if not os.path.exists(archive_dir):
365        if logger is not None:
366            logger.info("creating %s", archive_dir)
367        if not dry_run:
368            os.makedirs(archive_dir)
369
370
371    # creating the tarball
372    import tarfile  # late import so Python build itself doesn't break
373
374    if logger is not None:
375        logger.info('Creating tar archive')
376
377    uid = _get_uid(owner)
378    gid = _get_gid(group)
379
380    def _set_uid_gid(tarinfo):
381        if gid is not None:
382            tarinfo.gid = gid
383            tarinfo.gname = group
384        if uid is not None:
385            tarinfo.uid = uid
386            tarinfo.uname = owner
387        return tarinfo
388
389    if not dry_run:
390        tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
391        try:
392            tar.add(base_dir, filter=_set_uid_gid)
393        finally:
394            tar.close()
395
396    return archive_name
397
398def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
399    # XXX see if we want to keep an external call here
400    if verbose:
401        zipoptions = "-r"
402    else:
403        zipoptions = "-rq"
404    from distutils.errors import DistutilsExecError
405    from distutils.spawn import spawn
406    try:
407        spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
408    except DistutilsExecError:
409        # XXX really should distinguish between "couldn't find
410        # external 'zip' command" and "zip failed".
411        raise ExecError, \
412            ("unable to create zip file '%s': "
413            "could neither import the 'zipfile' module nor "
414            "find a standalone zip utility") % zip_filename
415
416def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
417    """Create a zip file from all the files under 'base_dir'.
418
419    The output zip file will be named 'base_name' + ".zip".  Uses either the
420    "zipfile" Python module (if available) or the InfoZIP "zip" utility
421    (if installed and found on the default search path).  If neither tool is
422    available, raises ExecError.  Returns the name of the output zip
423    file.
424    """
425    zip_filename = base_name + ".zip"
426    archive_dir = os.path.dirname(base_name)
427
428    if not os.path.exists(archive_dir):
429        if logger is not None:
430            logger.info("creating %s", archive_dir)
431        if not dry_run:
432            os.makedirs(archive_dir)
433
434    # If zipfile module is not available, try spawning an external 'zip'
435    # command.
436    try:
437        import zipfile
438    except ImportError:
439        zipfile = None
440
441    if zipfile is None:
442        _call_external_zip(base_dir, zip_filename, verbose, dry_run)
443    else:
444        if logger is not None:
445            logger.info("creating '%s' and adding '%s' to it",
446                        zip_filename, base_dir)
447
448        if not dry_run:
449            zip = zipfile.ZipFile(zip_filename, "w",
450                                  compression=zipfile.ZIP_DEFLATED)
451
452            for dirpath, dirnames, filenames in os.walk(base_dir):
453                for name in filenames:
454                    path = os.path.normpath(os.path.join(dirpath, name))
455                    if os.path.isfile(path):
456                        zip.write(path, path)
457                        if logger is not None:
458                            logger.info("adding '%s'", path)
459            zip.close()
460
461    return zip_filename
462
463_ARCHIVE_FORMATS = {
464    'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
465    'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
466    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
467    'zip':   (_make_zipfile, [],"ZIP file")
468    }
469
470def get_archive_formats():
471    """Returns a list of supported formats for archiving and unarchiving.
472
473    Each element of the returned sequence is a tuple (name, description)
474    """
475    formats = [(name, registry[2]) for name, registry in
476               _ARCHIVE_FORMATS.items()]
477    formats.sort()
478    return formats
479
480def register_archive_format(name, function, extra_args=None, description=''):
481    """Registers an archive format.
482
483    name is the name of the format. function is the callable that will be
484    used to create archives. If provided, extra_args is a sequence of
485    (name, value) tuples that will be passed as arguments to the callable.
486    description can be provided to describe the format, and will be returned
487    by the get_archive_formats() function.
488    """
489    if extra_args is None:
490        extra_args = []
491    if not isinstance(function, collections.Callable):
492        raise TypeError('The %s object is not callable' % function)
493    if not isinstance(extra_args, (tuple, list)):
494        raise TypeError('extra_args needs to be a sequence')
495    for element in extra_args:
496        if not isinstance(element, (tuple, list)) or len(element) !=2 :
497            raise TypeError('extra_args elements are : (arg_name, value)')
498
499    _ARCHIVE_FORMATS[name] = (function, extra_args, description)
500
501def unregister_archive_format(name):
502    del _ARCHIVE_FORMATS[name]
503
504def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
505                 dry_run=0, owner=None, group=None, logger=None):
506    """Create an archive file (eg. zip or tar).
507
508    'base_name' is the name of the file to create, minus any format-specific
509    extension; 'format' is the archive format: one of "zip", "tar", "bztar"
510    or "gztar".
511
512    'root_dir' is a directory that will be the root directory of the
513    archive; ie. we typically chdir into 'root_dir' before creating the
514    archive.  'base_dir' is the directory where we start archiving from;
515    ie. 'base_dir' will be the common prefix of all files and
516    directories in the archive.  'root_dir' and 'base_dir' both default
517    to the current directory.  Returns the name of the archive file.
518
519    'owner' and 'group' are used when creating a tar archive. By default,
520    uses the current owner and group.
521    """
522    save_cwd = os.getcwd()
523    if root_dir is not None:
524        if logger is not None:
525            logger.debug("changing into '%s'", root_dir)
526        base_name = os.path.abspath(base_name)
527        if not dry_run:
528            os.chdir(root_dir)
529
530    if base_dir is None:
531        base_dir = os.curdir
532
533    kwargs = {'dry_run': dry_run, 'logger': logger}
534
535    try:
536        format_info = _ARCHIVE_FORMATS[format]
537    except KeyError:
538        raise ValueError, "unknown archive format '%s'" % format
539
540    func = format_info[0]
541    for arg, val in format_info[1]:
542        kwargs[arg] = val
543
544    if format != 'zip':
545        kwargs['owner'] = owner
546        kwargs['group'] = group
547
548    try:
549        filename = func(base_name, base_dir, **kwargs)
550    finally:
551        if root_dir is not None:
552            if logger is not None:
553                logger.debug("changing back to '%s'", save_cwd)
554            os.chdir(save_cwd)
555
556    return filename
557