sdist.py revision 3ce77fd05ed00168f618b63401d770ccc4f04b09
1"""distutils.command.sdist
2
3Implements the Distutils 'sdist' command (create a source distribution)."""
4
5# created 1999/09/22, Greg Ward
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10import fnmatch
11from types import *
12from glob import glob
13from shutil import rmtree
14from distutils.core import Command
15from distutils.util import newer
16from distutils.text_file import TextFile
17from distutils.errors import DistutilsExecError
18
19
20class sdist (Command):
21
22    description = "create a source distribution (tarball, zip file, etc.)"
23
24    user_options = [
25        ('template=', 't',
26         "name of manifest template file [default: MANIFEST.in]"),
27        ('manifest=', 'm',
28         "name of manifest file [default: MANIFEST]"),
29        ('use-defaults', None,
30         "include the default file set in the manifest "
31         "[default; disable with --no-defaults]"),
32        ('manifest-only', None,
33         "just regenerate the manifest and then stop"),
34        ('force-manifest', None,
35         "forcibly regenerate the manifest and carry on as usual"),
36
37        ('formats=', None,
38         "formats for source distribution (tar, ztar, gztar, or zip)"),
39        ('list-only', 'l',
40         "just list files that would be distributed"),
41        ('keep-tree', 'k',
42         "keep the distribution tree around after creating " +
43         "archive file(s)"),
44        ]
45    negative_opts = {'use-defaults': 'no-defaults'}
46
47    default_format = { 'posix': 'gztar',
48                       'nt': 'zip' }
49
50    exclude_re = re.compile (r'\s*!\s*(\S+)') # for manifest lines
51
52
53    def initialize_options (self):
54        # 'template' and 'manifest' are, respectively, the names of
55        # the manifest template and manifest file.
56        self.template = None
57        self.manifest = None
58
59        # 'use_defaults': if true, we will include the default file set
60        # in the manifest
61        self.use_defaults = 1
62
63        self.manifest_only = 0
64        self.force_manifest = 0
65
66        self.formats = None
67        self.list_only = 0
68        self.keep_tree = 0
69
70
71    def finalize_options (self):
72        if self.manifest is None:
73            self.manifest = "MANIFEST"
74        if self.template is None:
75            self.template = "MANIFEST.in"
76
77        if self.formats is None:
78            try:
79                self.formats = [self.default_format[os.name]]
80            except KeyError:
81                raise DistutilsPlatformError, \
82                      "don't know how to build source distributions on " + \
83                      "%s platform" % os.name
84        elif type (self.formats) is StringType:
85            self.formats = string.split (self.formats, ',')
86
87
88    def run (self):
89
90        # 'files' is the list of files that will make up the manifest
91        self.files = []
92
93        # Ensure that all required meta-data is given; warn if not (but
94        # don't die, it's not *that* serious!)
95        self.check_metadata ()
96
97        # Do whatever it takes to get the list of files to process
98        # (process the manifest template, read an existing manifest,
99        # whatever).  File list is put into 'self.files'.
100        self.get_file_list ()
101
102        # If user just wanted us to regenerate the manifest, stop now.
103        if self.manifest_only:
104            return
105
106        # Otherwise, go ahead and create the source distribution tarball,
107        # or zipfile, or whatever.
108        self.make_distribution ()
109
110
111    def check_metadata (self):
112
113        dist = self.distribution
114
115        missing = []
116        for attr in ('name', 'version', 'url'):
117            if not (hasattr (dist, attr) and getattr (dist, attr)):
118                missing.append (attr)
119
120        if missing:
121            self.warn ("missing required meta-data: " +
122                       string.join (missing, ", "))
123
124        if dist.author:
125            if not dist.author_email:
126                self.warn ("missing meta-data: if 'author' supplied, " +
127                           "'author_email' must be supplied too")
128        elif dist.maintainer:
129            if not dist.maintainer_email:
130                self.warn ("missing meta-data: if 'maintainer' supplied, " +
131                           "'maintainer_email' must be supplied too")
132        else:
133            self.warn ("missing meta-data: either (author and author_email) " +
134                       "or (maintainer and maintainer_email) " +
135                       "must be supplied")
136
137    # check_metadata ()
138
139
140    def get_file_list (self):
141        """Figure out the list of files to include in the source
142           distribution, and put it in 'self.files'.  This might
143           involve reading the manifest template (and writing the
144           manifest), or just reading the manifest, or just using
145           the default file set -- it all depends on the user's
146           options and the state of the filesystem."""
147
148
149        template_exists = os.path.isfile (self.template)
150        if template_exists:
151            template_newer = newer (self.template, self.manifest)
152
153        # Regenerate the manifest if necessary (or if explicitly told to)
154        if ((template_exists and template_newer) or
155            self.force_manifest or
156            self.manifest_only):
157
158            if not template_exists:
159                self.warn (("manifest template '%s' does not exist " +
160                            "(using default file list)") %
161                           self.template)
162
163            # Add default file set to 'files'
164            if self.use_defaults:
165                self.find_defaults ()
166
167            # Read manifest template if it exists
168            if template_exists:
169                self.read_template ()
170
171            # File list now complete -- sort it so that higher-level files
172            # come first
173            sortable_files = map (os.path.split, self.files)
174            sortable_files.sort ()
175            self.files = []
176            for sort_tuple in sortable_files:
177                self.files.append (apply (os.path.join, sort_tuple))
178
179            # Remove duplicates from the file list
180            for i in range (len(self.files)-1, 0, -1):
181                if self.files[i] == self.files[i-1]:
182                    del self.files[i]
183
184            # And write complete file list (including default file set) to
185            # the manifest.
186            self.write_manifest ()
187
188        # Don't regenerate the manifest, just read it in.
189        else:
190            self.read_manifest ()
191
192    # get_file_list ()
193
194
195    def find_defaults (self):
196
197        standards = [('README', 'README.txt'), 'setup.py']
198        for fn in standards:
199            if type (fn) is TupleType:
200                alts = fn
201                got_it = 0
202                for fn in alts:
203                    if os.path.exists (fn):
204                        got_it = 1
205                        self.files.append (fn)
206                        break
207
208                if not got_it:
209                    self.warn ("standard file not found: should have one of " +
210                               string.join (alts, ', '))
211            else:
212                if os.path.exists (fn):
213                    self.files.append (fn)
214                else:
215                    self.warn ("standard file '%s' not found" % fn)
216
217        optional = ['test/test*.py']
218        for pattern in optional:
219            files = filter (os.path.isfile, glob (pattern))
220            if files:
221                self.files.extend (files)
222
223        if self.distribution.packages or self.distribution.py_modules:
224            build_py = self.find_peer ('build_py')
225            build_py.ensure_ready ()
226            self.files.extend (build_py.get_source_files ())
227
228        if self.distribution.ext_modules:
229            build_ext = self.find_peer ('build_ext')
230            build_ext.ensure_ready ()
231            self.files.extend (build_ext.get_source_files ())
232
233
234
235    def search_dir (self, dir, pattern=None):
236        """Recursively find files under 'dir' matching 'pattern' (a string
237           containing a Unix-style glob pattern).  If 'pattern' is None,
238           find all files under 'dir'.  Return the list of found
239           filenames."""
240
241        allfiles = findall (dir)
242        if pattern is None:
243            return allfiles
244
245        pattern_re = translate_pattern (pattern)
246        files = []
247        for file in allfiles:
248            if pattern_re.match (os.path.basename (file)):
249                files.append (file)
250
251        return files
252
253    # search_dir ()
254
255
256    def exclude_pattern (self, pattern):
257        """Remove filenames from 'self.files' that match 'pattern'."""
258        print "exclude_pattern: pattern=%s" % pattern
259        pattern_re = translate_pattern (pattern)
260        for i in range (len (self.files)-1, -1, -1):
261            if pattern_re.match (self.files[i]):
262                print "removing %s" % self.files[i]
263                del self.files[i]
264
265
266    def recursive_exclude_pattern (self, dir, pattern=None):
267        """Remove filenames from 'self.files' that are under 'dir'
268           and whose basenames match 'pattern'."""
269
270        print "recursive_exclude_pattern: dir=%s, pattern=%s" % (dir, pattern)
271        if pattern is None:
272            pattern_re = None
273        else:
274            pattern_re = translate_pattern (pattern)
275
276        for i in range (len (self.files)-1, -1, -1):
277            (cur_dir, cur_base) = os.path.split (self.files[i])
278            if (cur_dir == dir and
279                (pattern_re is None or pattern_re.match (cur_base))):
280                print "removing %s" % self.files[i]
281                del self.files[i]
282
283
284    def read_template (self):
285        """Read and parse the manifest template file named by
286           'self.template' (usually "MANIFEST.in").  Process all file
287           specifications (include and exclude) in the manifest template
288           and add the resulting filenames to 'self.files'."""
289
290        assert self.files is not None and type (self.files) is ListType
291
292        template = TextFile (self.template,
293                             strip_comments=1,
294                             skip_blanks=1,
295                             join_lines=1,
296                             lstrip_ws=1,
297                             rstrip_ws=1,
298                             collapse_ws=1)
299
300        all_files = findall ()
301
302        while 1:
303
304            line = template.readline()
305            if line is None:            # end of file
306                break
307
308            words = string.split (line)
309            action = words[0]
310
311            # First, check that the right number of words are present
312            # for the given action (which is the first word)
313            if action in ('include','exclude',
314                          'global-include','global-exclude'):
315                if len (words) != 2:
316                    template.warn \
317                        ("invalid manifest template line: " +
318                         "'%s' expects a single <pattern>" %
319                         action)
320                    continue
321
322                pattern = words[1]
323
324            elif action in ('recursive-include','recursive-exclude'):
325                if len (words) != 3:
326                    template.warn \
327                        ("invalid manifest template line: " +
328                         "'%s' expects <dir> <pattern>" %
329                         action)
330                    continue
331
332                (dir, pattern) = words[1:3]
333
334            elif action in ('graft','prune'):
335                if len (words) != 2:
336                    template.warn \
337                        ("invalid manifest template line: " +
338                         "'%s' expects a single <dir_pattern>" %
339                         action)
340                    continue
341
342                dir_pattern = words[1]
343
344            else:
345                template.warn ("invalid manifest template line: " +
346                               "unknown action '%s'" % action)
347                continue
348
349            # OK, now we know that the action is valid and we have the
350            # right number of words on the line for that action -- so we
351            # can proceed with minimal error-checking.  Also, we have
352            # defined either 'patter', 'dir' and 'pattern', or
353            # 'dir_pattern' -- so we don't have to spend any time digging
354            # stuff up out of 'words'.
355
356            if action == 'include':
357                print "include", pattern
358                files = select_pattern (all_files, pattern, anchor=1)
359                if not files:
360                    template.warn ("no files found matching '%s'" % pattern)
361                else:
362                    self.files.extend (files)
363
364            elif action == 'exclude':
365                print "exclude", pattern
366                num = exclude_pattern (self.files, pattern, anchor=1)
367                if num == 0:
368                    template.warn \
369                        ("no previously-included files found matching '%s'" %
370                         pattern)
371
372            elif action == 'global-include':
373                print "global-include", pattern
374                files = select_pattern (all_files, pattern, anchor=0)
375                if not files:
376                    template.warn (("no files found matching '%s' " +
377                                    "anywhere in distribution") %
378                                   pattern)
379                else:
380                    self.files.extend (files)
381
382            elif action == 'global-exclude':
383                print "global-exclude", pattern
384                num = exclude_pattern (self.files, pattern, anchor=0)
385                if num == 0:
386                    template.warn \
387                        (("no previously-included files matching '%s' " +
388                          "found anywhere in distribution") %
389                         pattern)
390
391            elif action == 'recursive-include':
392                print "recursive-include", dir, pattern
393                files = select_pattern (all_files, pattern, prefix=dir)
394                if not files:
395                    template.warn (("no files found matching '%s' " +
396                                    "under directory '%s'") %
397                                   (pattern, dir))
398                else:
399                    self.files.extend (files)
400
401            elif action == 'recursive-exclude':
402                print "recursive-exclude", dir, pattern
403                num = exclude_pattern (self.files, pattern, prefix=dir)
404                if num == 0:
405                    template.warn \
406                        (("no previously-included files matching '%s' " +
407                          "found under directory '%s'") %
408                         (pattern, dir))
409
410            elif action == 'graft':
411                print "graft", dir_pattern
412                files = select_pattern (all_files, None, prefix=dir_pattern)
413                if not files:
414                    template.warn ("no directories found matching '%s'" %
415                                   dir_pattern)
416                else:
417                    self.files.extend (files)
418
419            elif action == 'prune':
420                print "prune", dir_pattern
421                num = exclude_pattern (self.files, None, prefix=dir_pattern)
422                if num == 0:
423                    template.warn \
424                        (("no previously-included directories found " +
425                          "matching '%s'") %
426                         dir_pattern)
427            else:
428                raise RuntimeError, \
429                      "this cannot happen: invalid action '%s'" % action
430
431        # while loop over lines of template file
432
433    # read_template ()
434
435
436    def write_manifest (self):
437        """Write the file list in 'self.files' (presumably as filled in
438           by 'find_defaults()' and 'read_template()') to the manifest file
439           named by 'self.manifest'."""
440
441        manifest = open (self.manifest, "w")
442        for fn in self.files:
443            manifest.write (fn + '\n')
444        manifest.close ()
445
446    # write_manifest ()
447
448
449    def read_manifest (self):
450        """Read the manifest file (named by 'self.manifest') and use
451           it to fill in 'self.files', the list of files to include
452           in the source distribution."""
453
454        manifest = open (self.manifest)
455        while 1:
456            line = manifest.readline ()
457            if line == '':              # end of file
458                break
459            if line[-1] == '\n':
460                line = line[0:-1]
461            self.files.append (line)
462
463    # read_manifest ()
464
465
466
467    def make_release_tree (self, base_dir, files):
468
469        # First get the list of directories to create
470        need_dir = {}
471        for file in files:
472            need_dir[os.path.join (base_dir, os.path.dirname (file))] = 1
473        need_dirs = need_dir.keys()
474        need_dirs.sort()
475
476        # Now create them
477        for dir in need_dirs:
478            self.mkpath (dir)
479
480        # And walk over the list of files, either making a hard link (if
481        # os.link exists) to each one that doesn't already exist in its
482        # corresponding location under 'base_dir', or copying each file
483        # that's out-of-date in 'base_dir'.  (Usually, all files will be
484        # out-of-date, because by default we blow away 'base_dir' when
485        # we're done making the distribution archives.)
486
487        try:
488            link = os.link
489            msg = "making hard links in %s..." % base_dir
490        except AttributeError:
491            link = 0
492            msg = "copying files to %s..." % base_dir
493
494        self.announce (msg)
495        for file in files:
496            dest = os.path.join (base_dir, file)
497            if link:
498                if not os.path.exists (dest):
499                    self.execute (os.link, (file, dest),
500                                  "linking %s -> %s" % (file, dest))
501            else:
502                self.copy_file (file, dest)
503
504    # make_release_tree ()
505
506
507    def nuke_release_tree (self, base_dir):
508        try:
509            self.execute (rmtree, (base_dir,),
510                          "removing %s" % base_dir)
511        except (IOError, OSError), exc:
512            if exc.filename:
513                msg = "error removing %s: %s (%s)" % \
514                       (base_dir, exc.strerror, exc.filename)
515            else:
516                msg = "error removing %s: %s" % (base_dir, exc.strerror)
517            self.warn (msg)
518
519
520    def make_tarball (self, base_dir, compress="gzip"):
521
522        # XXX GNU tar 1.13 has a nifty option to add a prefix directory.
523        # It's pretty new, though, so we certainly can't require it --
524        # but it would be nice to take advantage of it to skip the
525        # "create a tree of hardlinks" step!  (Would also be nice to
526        # detect GNU tar to use its 'z' option and save a step.)
527
528        if compress is not None and compress not in ('gzip', 'compress'):
529            raise ValueError, \
530                  "if given, 'compress' must be 'gzip' or 'compress'"
531
532        archive_name = base_dir + ".tar"
533        self.spawn (["tar", "-cf", archive_name, base_dir])
534
535        if compress:
536            self.spawn ([compress, archive_name])
537
538
539    def make_zipfile (self, base_dir):
540
541        # This initially assumed the Unix 'zip' utility -- but
542        # apparently InfoZIP's zip.exe works the same under Windows, so
543        # no changes needed!
544
545        try:
546            self.spawn (["zip", "-r", base_dir + ".zip", base_dir])
547        except DistutilsExecError:
548
549            # XXX really should distinguish between "couldn't find
550            # external 'zip' command" and "zip failed" -- shouldn't try
551            # again in the latter case.  (I think fixing this will
552            # require some cooperation from the spawn module -- perhaps
553            # a utility function to search the path, so we can fallback
554            # on zipfile.py without the failed spawn.)
555            try:
556                import zipfile
557            except ImportError:
558                raise DistutilsExecError, \
559                      ("unable to create zip file '%s.zip': " +
560                       "could neither find a standalone zip utility nor " +
561                       "import the 'zipfile' module") % base_dir
562
563            z = zipfile.ZipFile (base_dir + ".zip", "wb",
564                                 compression=zipfile.ZIP_DEFLATED)
565
566            def visit (z, dirname, names):
567                for name in names:
568                    path = os.path.join (dirname, name)
569                    if os.path.isfile (path):
570                        z.write (path, path)
571
572            os.path.walk (base_dir, visit, z)
573            z.close()
574
575
576    def make_distribution (self):
577
578        # Don't warn about missing meta-data here -- should be done
579        # elsewhere.
580        name = self.distribution.name or "UNKNOWN"
581        version = self.distribution.version
582
583        if version:
584            base_dir = "%s-%s" % (name, version)
585        else:
586            base_dir = name
587
588        # Remove any files that match "base_dir" from the fileset -- we
589        # don't want to go distributing the distribution inside itself!
590        self.exclude_pattern (base_dir + "*")
591
592        self.make_release_tree (base_dir, self.files)
593        for fmt in self.formats:
594            if fmt == 'gztar':
595                self.make_tarball (base_dir, compress='gzip')
596            elif fmt == 'ztar':
597                self.make_tarball (base_dir, compress='compress')
598            elif fmt == 'tar':
599                self.make_tarball (base_dir, compress=None)
600            elif fmt == 'zip':
601                self.make_zipfile (base_dir)
602
603        if not self.keep_tree:
604            self.nuke_release_tree (base_dir)
605
606# class Dist
607
608
609# ----------------------------------------------------------------------
610# Utility functions
611
612def findall (dir = os.curdir):
613    """Find all files under 'dir' and return the list of full
614       filenames (relative to 'dir')."""
615
616    list = []
617    stack = [dir]
618    pop = stack.pop
619    push = stack.append
620
621    while stack:
622        dir = pop()
623        names = os.listdir (dir)
624
625        for name in names:
626            if dir != os.curdir:        # avoid the dreaded "./" syndrome
627                fullname = os.path.join (dir, name)
628            else:
629                fullname = name
630            list.append (fullname)
631            if os.path.isdir (fullname) and not os.path.islink(fullname):
632                push (fullname)
633
634    return list
635
636
637def select_pattern (files, pattern, anchor=1, prefix=None):
638    """Select strings (presumably filenames) from 'files' that match
639       'pattern', a Unix-style wildcard (glob) pattern.  Patterns are not
640       quite the same as implemented by the 'fnmatch' module: '*' and '?'
641       match non-special characters, where "special" is platform-dependent:
642       slash on Unix, colon, slash, and backslash on DOS/Windows, and colon
643       on Mac OS.
644
645       If 'anchor' is true (the default), then the pattern match is more
646       stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
647       'anchor' is false, both of these will match.
648
649       If 'prefix' is supplied, then only filenames starting with 'prefix'
650       (itself a pattern) and ending with 'pattern', with anything in
651       between them, will match.  'anchor' is ignored in this case.
652
653       Return the list of matching strings, possibly empty."""
654
655    matches = []
656    pattern_re = translate_pattern (pattern, anchor, prefix)
657    print "select_pattern: applying re %s" % pattern_re.pattern
658    for name in files:
659        if pattern_re.search (name):
660            matches.append (name)
661            print " adding", name
662
663    return matches
664
665# select_pattern ()
666
667
668def exclude_pattern (files, pattern, anchor=1, prefix=None):
669
670    pattern_re = translate_pattern (pattern, anchor, prefix)
671    print "exclude_pattern: applying re %s" % pattern_re.pattern
672    for i in range (len(files)-1, -1, -1):
673        if pattern_re.search (files[i]):
674            print " removing", files[i]
675            del files[i]
676
677# exclude_pattern ()
678
679
680def glob_to_re (pattern):
681    """Translate a shell-like glob pattern to a regular expression;
682       return a string containing the regex.  Differs from
683       'fnmatch.translate()' in that '*' does not match "special
684       characters" (which are platform-specific)."""
685    pattern_re = fnmatch.translate (pattern)
686
687    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
688    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
689    # and by extension they shouldn't match such "special characters" under
690    # any OS.  So change all non-escaped dots in the RE to match any
691    # character except the special characters.
692    # XXX currently the "special characters" are just slash -- i.e. this is
693    # Unix-only.
694    pattern_re = re.sub (r'(^|[^\\])\.', r'\1[^/]', pattern_re)
695    return pattern_re
696
697# glob_to_re ()
698
699
700def translate_pattern (pattern, anchor=1, prefix=None):
701    """Translate a shell-like wildcard pattern to a compiled regular
702       expression.    Return the compiled regex."""
703
704    if pattern:
705        pattern_re = glob_to_re (pattern)
706    else:
707        pattern_re = ''
708
709    if prefix is not None:
710        prefix_re = (glob_to_re (prefix))[0:-1] # ditch trailing $
711        pattern_re = "^" + os.path.join (prefix_re, ".*" + pattern_re)
712    else:                               # no prefix -- respect anchor flag
713        if anchor:
714            pattern_re = "^" + pattern_re
715
716    return re.compile (pattern_re)
717
718# translate_pattern ()
719