sdist.py revision 4a7319ca286e78ae9ddf9f86a50eee3eab813855
1"""distutils.command.sdist
2
3Implements the Distutils 'sdist' command (create a source distribution)."""
4
5# created 1999/09/22, Greg Ward
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10import fnmatch
11from types import *
12from glob import glob
13from distutils.core import Command
14from distutils.util import \
15     convert_path, create_tree, remove_tree, newer, write_file, \
16     check_archive_formats, ARCHIVE_FORMATS
17from distutils.text_file import TextFile
18from distutils.errors import DistutilsExecError, DistutilsOptionError
19
20
21class sdist (Command):
22
23    description = "create a source distribution (tarball, zip file, etc.)"
24
25    user_options = [
26        ('template=', 't',
27         "name of manifest template file [default: MANIFEST.in]"),
28        ('manifest=', 'm',
29         "name of manifest file [default: MANIFEST]"),
30        ('use-defaults', None,
31         "include the default file set in the manifest "
32         "[default; disable with --no-defaults]"),
33        ('manifest-only', 'o',
34         "just regenerate the manifest and then stop "
35         "(implies --force-manifest)"),
36        ('force-manifest', 'f',
37         "forcibly regenerate the manifest and carry on as usual"),
38        ('formats=', None,
39         "formats for source distribution"),
40        ('keep-tree', 'k',
41         "keep the distribution tree around after creating " +
42         "archive file(s)"),
43        ]
44
45
46    # XXX ugh: this has to precede the 'help_options' list, because
47    # it is mentioned there -- also, this is not a method, even though
48    # it's defined in a class: double-ugh!
49    def show_formats ():
50        """Print all possible values for the 'formats' option -- used by
51        the "--help-formats" command-line option.
52        """
53	from distutils.fancy_getopt import FancyGetopt
54	formats=[]
55	for format in ARCHIVE_FORMATS.keys():
56	    formats.append(("formats="+format,None,ARCHIVE_FORMATS[format][2]))
57	formats.sort()
58	pretty_printer = FancyGetopt(formats)
59	pretty_printer.print_help(
60            "List of available source distribution formats:")
61
62    help_options = [
63        ('help-formats', None,
64         "lists available distribution formats", show_formats),
65	]
66
67    negative_opts = {'use-defaults': 'no-defaults'}
68
69    default_format = { 'posix': 'gztar',
70                       'nt': 'zip' }
71
72
73    def initialize_options (self):
74        # 'template' and 'manifest' are, respectively, the names of
75        # the manifest template and manifest file.
76        self.template = None
77        self.manifest = None
78
79        # 'use_defaults': if true, we will include the default file set
80        # in the manifest
81        self.use_defaults = 1
82
83        self.manifest_only = 0
84        self.force_manifest = 0
85
86        self.formats = None
87        self.keep_tree = 0
88
89        self.archive_files = None
90
91
92    def finalize_options (self):
93        if self.manifest is None:
94            self.manifest = "MANIFEST"
95        if self.template is None:
96            self.template = "MANIFEST.in"
97
98        self.ensure_string_list('formats')
99        if self.formats is None:
100            try:
101                self.formats = [self.default_format[os.name]]
102            except KeyError:
103                raise DistutilsPlatformError, \
104                      "don't know how to create source distributions " + \
105                      "on platform %s" % os.name
106
107        bad_format = check_archive_formats (self.formats)
108        if bad_format:
109            raise DistutilsOptionError, \
110                  "unknown archive format '%s'" % bad_format
111
112
113    def run (self):
114
115        # 'files' is the list of files that will make up the manifest
116        self.files = []
117
118        # Ensure that all required meta-data is given; warn if not (but
119        # don't die, it's not *that* serious!)
120        self.check_metadata ()
121
122        # Do whatever it takes to get the list of files to process
123        # (process the manifest template, read an existing manifest,
124        # whatever).  File list is put into 'self.files'.
125        self.get_file_list ()
126
127        # If user just wanted us to regenerate the manifest, stop now.
128        if self.manifest_only:
129            return
130
131        # Otherwise, go ahead and create the source distribution tarball,
132        # or zipfile, or whatever.
133        self.make_distribution ()
134
135
136    def check_metadata (self):
137        """Ensure that all required elements of meta-data (name, version,
138        URL, (author and author_email) or (maintainer and
139        maintainer_email)) are supplied by the Distribution object; warn if
140        any are missing.
141        """
142        metadata = self.distribution.metadata
143
144        missing = []
145        for attr in ('name', 'version', 'url'):
146            if not (hasattr (metadata, attr) and getattr (metadata, attr)):
147                missing.append (attr)
148
149        if missing:
150            self.warn ("missing required meta-data: " +
151                       string.join (missing, ", "))
152
153        if metadata.author:
154            if not metadata.author_email:
155                self.warn ("missing meta-data: if 'author' supplied, " +
156                           "'author_email' must be supplied too")
157        elif metadata.maintainer:
158            if not metadata.maintainer_email:
159                self.warn ("missing meta-data: if 'maintainer' supplied, " +
160                           "'maintainer_email' must be supplied too")
161        else:
162            self.warn ("missing meta-data: either (author and author_email) " +
163                       "or (maintainer and maintainer_email) " +
164                       "must be supplied")
165
166    # check_metadata ()
167
168
169    def get_file_list (self):
170        """Figure out the list of files to include in the source
171        distribution, and put it in 'self.files'.  This might involve
172        reading the manifest template (and writing the manifest), or just
173        reading the manifest, or just using the default file set -- it all
174        depends on the user's options and the state of the filesystem.
175        """
176        template_exists = os.path.isfile (self.template)
177        if template_exists:
178            template_newer = newer (self.template, self.manifest)
179
180        # Regenerate the manifest if necessary (or if explicitly told to)
181        if ((template_exists and template_newer) or
182            self.force_manifest or
183            self.manifest_only):
184
185            if not template_exists:
186                self.warn (("manifest template '%s' does not exist " +
187                            "(using default file list)") %
188                           self.template)
189
190            # Add default file set to 'files'
191            if self.use_defaults:
192                self.add_defaults ()
193
194            # Read manifest template if it exists
195            if template_exists:
196                self.read_template ()
197
198            # File list now complete -- sort it so that higher-level files
199            # come first
200            sortable_files = map (os.path.split, self.files)
201            sortable_files.sort ()
202            self.files = []
203            for sort_tuple in sortable_files:
204                self.files.append (apply (os.path.join, sort_tuple))
205
206            # Remove duplicates from the file list
207            for i in range (len(self.files)-1, 0, -1):
208                if self.files[i] == self.files[i-1]:
209                    del self.files[i]
210
211            # And write complete file list (including default file set) to
212            # the manifest.
213            self.write_manifest ()
214
215        # Don't regenerate the manifest, just read it in.
216        else:
217            self.read_manifest ()
218
219    # get_file_list ()
220
221
222    def add_defaults (self):
223        """Add all the default files to self.files:
224          - README or README.txt
225          - setup.py
226          - test/test*.py
227          - all pure Python modules mentioned in setup script
228          - all C sources listed as part of extensions or C libraries
229            in the setup script (doesn't catch C headers!)
230        Warns if (README or README.txt) or setup.py are missing; everything
231        else is optional.
232        """
233        standards = [('README', 'README.txt'), 'setup.py']
234        for fn in standards:
235            if type (fn) is TupleType:
236                alts = fn
237                got_it = 0
238                for fn in alts:
239                    if os.path.exists (fn):
240                        got_it = 1
241                        self.files.append (fn)
242                        break
243
244                if not got_it:
245                    self.warn ("standard file not found: should have one of " +
246                               string.join (alts, ', '))
247            else:
248                if os.path.exists (fn):
249                    self.files.append (fn)
250                else:
251                    self.warn ("standard file '%s' not found" % fn)
252
253        optional = ['test/test*.py']
254        for pattern in optional:
255            files = filter (os.path.isfile, glob (pattern))
256            if files:
257                self.files.extend (files)
258
259        if self.distribution.has_pure_modules():
260            build_py = self.get_finalized_command ('build_py')
261            self.files.extend (build_py.get_source_files ())
262
263        if self.distribution.has_ext_modules():
264            build_ext = self.get_finalized_command ('build_ext')
265            self.files.extend (build_ext.get_source_files ())
266
267        if self.distribution.has_c_libraries():
268            build_clib = self.get_finalized_command ('build_clib')
269            self.files.extend (build_clib.get_source_files ())
270
271    # add_defaults ()
272
273
274    def search_dir (self, dir, pattern=None):
275        """Recursively find files under 'dir' matching 'pattern' (a string
276        containing a Unix-style glob pattern).  If 'pattern' is None, find
277        all files under 'dir'.  Return the list of found filenames.
278        """
279        allfiles = findall (dir)
280        if pattern is None:
281            return allfiles
282
283        pattern_re = translate_pattern (pattern)
284        files = []
285        for file in allfiles:
286            if pattern_re.match (os.path.basename (file)):
287                files.append (file)
288
289        return files
290
291    # search_dir ()
292
293
294    def recursive_exclude_pattern (self, dir, pattern=None):
295        """Remove filenames from 'self.files' that are under 'dir' and
296        whose basenames match 'pattern'.
297        """
298        self.debug_print("recursive_exclude_pattern: dir=%s, pattern=%s" %
299                         (dir, pattern))
300        if pattern is None:
301            pattern_re = None
302        else:
303            pattern_re = translate_pattern (pattern)
304
305        for i in range (len (self.files)-1, -1, -1):
306            (cur_dir, cur_base) = os.path.split (self.files[i])
307            if (cur_dir == dir and
308                (pattern_re is None or pattern_re.match (cur_base))):
309                self.debug_print("removing %s" % self.files[i])
310                del self.files[i]
311
312
313    def read_template (self):
314        """Read and parse the manifest template file named by
315        'self.template' (usually "MANIFEST.in").  Process all file
316        specifications (include and exclude) in the manifest template and
317        update 'self.files' accordingly (filenames may be added to
318        or removed from 'self.files' based on the manifest template).
319        """
320        assert self.files is not None and type (self.files) is ListType
321        self.announce("reading manifest template '%s'" % self.template)
322
323        template = TextFile (self.template,
324                             strip_comments=1,
325                             skip_blanks=1,
326                             join_lines=1,
327                             lstrip_ws=1,
328                             rstrip_ws=1,
329                             collapse_ws=1)
330
331        all_files = findall ()
332
333        while 1:
334
335            line = template.readline()
336            if line is None:            # end of file
337                break
338
339            words = string.split (line)
340            action = words[0]
341
342            # First, check that the right number of words are present
343            # for the given action (which is the first word)
344            if action in ('include','exclude',
345                          'global-include','global-exclude'):
346                if len (words) < 2:
347                    template.warn \
348                        ("invalid manifest template line: " +
349                         "'%s' expects <pattern1> <pattern2> ..." %
350                         action)
351                    continue
352
353                pattern_list = map(convert_path, words[1:])
354
355            elif action in ('recursive-include','recursive-exclude'):
356                if len (words) < 3:
357                    template.warn \
358                        ("invalid manifest template line: " +
359                         "'%s' expects <dir> <pattern1> <pattern2> ..." %
360                         action)
361                    continue
362
363                dir = convert_path(words[1])
364                pattern_list = map (convert_path, words[2:])
365
366            elif action in ('graft','prune'):
367                if len (words) != 2:
368                    template.warn \
369                        ("invalid manifest template line: " +
370                         "'%s' expects a single <dir_pattern>" %
371                         action)
372                    continue
373
374                dir_pattern = convert_path (words[1])
375
376            else:
377                template.warn ("invalid manifest template line: " +
378                               "unknown action '%s'" % action)
379                continue
380
381            # OK, now we know that the action is valid and we have the
382            # right number of words on the line for that action -- so we
383            # can proceed with minimal error-checking.  Also, we have
384            # defined either (pattern), (dir and pattern), or
385            # (dir_pattern) -- so we don't have to spend any time
386            # digging stuff up out of 'words'.
387
388            if action == 'include':
389                self.debug_print("include " + string.join(pattern_list))
390                for pattern in pattern_list:
391                    files = self.select_pattern (all_files, pattern, anchor=1)
392                    if not files:
393                        template.warn ("no files found matching '%s'" %
394                                       pattern)
395                    else:
396                        self.files.extend (files)
397
398            elif action == 'exclude':
399                self.debug_print("exclude " + string.join(pattern_list))
400                for pattern in pattern_list:
401                    num = self.exclude_pattern (self.files, pattern, anchor=1)
402                    if num == 0:
403                        template.warn (
404                            "no previously-included files found matching '%s'"%
405                            pattern)
406
407            elif action == 'global-include':
408                self.debug_print("global-include " + string.join(pattern_list))
409                for pattern in pattern_list:
410                    files = self.select_pattern (all_files, pattern, anchor=0)
411                    if not files:
412                        template.warn (("no files found matching '%s' " +
413                                        "anywhere in distribution") %
414                                       pattern)
415                    else:
416                        self.files.extend (files)
417
418            elif action == 'global-exclude':
419                self.debug_print("global-exclude " + string.join(pattern_list))
420                for pattern in pattern_list:
421                    num = self.exclude_pattern (self.files, pattern, anchor=0)
422                    if num == 0:
423                        template.warn \
424                            (("no previously-included files matching '%s' " +
425                              "found anywhere in distribution") %
426                             pattern)
427
428            elif action == 'recursive-include':
429                self.debug_print("recursive-include %s %s" %
430                                 (dir, string.join(pattern_list)))
431                for pattern in pattern_list:
432                    files = self.select_pattern (
433                        all_files, pattern, prefix=dir)
434                    if not files:
435                        template.warn (("no files found matching '%s' " +
436                                        "under directory '%s'") %
437                                       (pattern, dir))
438                    else:
439                        self.files.extend (files)
440
441            elif action == 'recursive-exclude':
442                self.debug_print("recursive-exclude %s %s" %
443                                 (dir, string.join(pattern_list)))
444                for pattern in pattern_list:
445                    num = self.exclude_pattern(
446                        self.files, pattern, prefix=dir)
447                    if num == 0:
448                        template.warn \
449                            (("no previously-included files matching '%s' " +
450                              "found under directory '%s'") %
451                             (pattern, dir))
452
453            elif action == 'graft':
454                self.debug_print("graft " + dir_pattern)
455                files = self.select_pattern(
456                    all_files, None, prefix=dir_pattern)
457                if not files:
458                    template.warn ("no directories found matching '%s'" %
459                                   dir_pattern)
460                else:
461                    self.files.extend (files)
462
463            elif action == 'prune':
464                self.debug_print("prune " + dir_pattern)
465                num = self.exclude_pattern(
466                    self.files, None, prefix=dir_pattern)
467                if num == 0:
468                    template.warn \
469                        (("no previously-included directories found " +
470                          "matching '%s'") %
471                         dir_pattern)
472            else:
473                raise RuntimeError, \
474                      "this cannot happen: invalid action '%s'" % action
475
476        # while loop over lines of template file
477
478        # Prune away the build and source distribution directories
479        build = self.get_finalized_command ('build')
480        self.exclude_pattern (self.files, None, prefix=build.build_base)
481
482        base_dir = self.distribution.get_fullname()
483        self.exclude_pattern (self.files, None, prefix=base_dir)
484
485    # read_template ()
486
487
488    def select_pattern (self, files, pattern, anchor=1, prefix=None):
489        """Select strings (presumably filenames) from 'files' that match
490        'pattern', a Unix-style wildcard (glob) pattern.  Patterns are not
491        quite the same as implemented by the 'fnmatch' module: '*' and '?'
492        match non-special characters, where "special" is platform-dependent:
493        slash on Unix, colon, slash, and backslash on DOS/Windows, and colon on
494        Mac OS.
495
496        If 'anchor' is true (the default), then the pattern match is more
497        stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
498        'anchor' is false, both of these will match.
499
500        If 'prefix' is supplied, then only filenames starting with 'prefix'
501        (itself a pattern) and ending with 'pattern', with anything in between
502        them, will match.  'anchor' is ignored in this case.
503
504        Return the list of matching strings, possibly empty.
505        """
506        matches = []
507        pattern_re = translate_pattern (pattern, anchor, prefix)
508        self.debug_print("select_pattern: applying regex r'%s'" %
509                         pattern_re.pattern)
510        for name in files:
511            if pattern_re.search (name):
512                matches.append (name)
513                self.debug_print(" adding " + name)
514
515        return matches
516
517    # select_pattern ()
518
519
520    def exclude_pattern (self, files, pattern, anchor=1, prefix=None):
521        """Remove strings (presumably filenames) from 'files' that match
522        'pattern'.  'pattern', 'anchor', 'and 'prefix' are the same
523        as for 'select_pattern()', above.  The list 'files' is modified
524        in place.
525        """
526        pattern_re = translate_pattern (pattern, anchor, prefix)
527        self.debug_print("exclude_pattern: applying regex r'%s'" %
528                         pattern_re.pattern)
529        for i in range (len(files)-1, -1, -1):
530            if pattern_re.search (files[i]):
531                self.debug_print(" removing " + files[i])
532                del files[i]
533
534    # exclude_pattern ()
535
536
537    def write_manifest (self):
538        """Write the file list in 'self.files' (presumably as filled in by
539        'add_defaults()' and 'read_template()') to the manifest file named
540        by 'self.manifest'.
541        """
542        self.execute(write_file,
543                     (self.manifest, self.files),
544                     "writing manifest file '%s'" % self.manifest)
545
546    # write_manifest ()
547
548
549    def read_manifest (self):
550        """Read the manifest file (named by 'self.manifest') and use it to
551        fill in 'self.files', the list of files to include in the source
552        distribution.
553        """
554        self.announce("reading manifest file '%s'" % self.manifest)
555        manifest = open (self.manifest)
556        while 1:
557            line = manifest.readline ()
558            if line == '':              # end of file
559                break
560            if line[-1] == '\n':
561                line = line[0:-1]
562            self.files.append (line)
563
564    # read_manifest ()
565
566
567    def make_release_tree (self, base_dir, files):
568        """Create the directory tree that will become the source
569        distribution archive.  All directories implied by the filenames in
570        'files' are created under 'base_dir', and then we hard link or copy
571        (if hard linking is unavailable) those files into place.
572        Essentially, this duplicates the developer's source tree, but in a
573        directory named after the distribution, containing only the files
574        to be distributed.
575        """
576        # Create all the directories under 'base_dir' necessary to
577        # put 'files' there.
578        create_tree (base_dir, files,
579                     verbose=self.verbose, dry_run=self.dry_run)
580
581        # And walk over the list of files, either making a hard link (if
582        # os.link exists) to each one that doesn't already exist in its
583        # corresponding location under 'base_dir', or copying each file
584        # that's out-of-date in 'base_dir'.  (Usually, all files will be
585        # out-of-date, because by default we blow away 'base_dir' when
586        # we're done making the distribution archives.)
587
588        if hasattr (os, 'link'):        # can make hard links on this system
589            link = 'hard'
590            msg = "making hard links in %s..." % base_dir
591        else:                           # nope, have to copy
592            link = None
593            msg = "copying files to %s..." % base_dir
594
595        self.announce (msg)
596        for file in files:
597            dest = os.path.join (base_dir, file)
598            self.copy_file (file, dest, link=link)
599
600    # make_release_tree ()
601
602
603    def make_distribution (self):
604        """Create the source distribution(s).  First, we create the release
605        tree with 'make_release_tree()'; then, we create all required
606        archive files (according to 'self.formats') from the release tree.
607        Finally, we clean up by blowing away the release tree (unless
608        'self.keep_tree' is true).  The list of archive files created is
609        stored so it can be retrieved later by 'get_archive_files()'.
610        """
611        # Don't warn about missing meta-data here -- should be (and is!)
612        # done elsewhere.
613        base_dir = self.distribution.get_fullname()
614
615        # Remove any files that match "base_dir" from the fileset -- we
616        # don't want to go distributing the distribution inside itself!
617        self.exclude_pattern (self.files, base_dir + "*")
618
619        self.make_release_tree (base_dir, self.files)
620        archive_files = []              # remember names of files we create
621        for fmt in self.formats:
622            file = self.make_archive (base_dir, fmt, base_dir=base_dir)
623            archive_files.append(file)
624
625        self.archive_files = archive_files
626
627        if not self.keep_tree:
628            remove_tree (base_dir, self.verbose, self.dry_run)
629
630    def get_archive_files (self):
631        """Return the list of archive files created when the command
632        was run, or None if the command hasn't run yet.
633        """
634        return self.archive_files
635
636# class sdist
637
638
639# ----------------------------------------------------------------------
640# Utility functions
641
642def findall (dir = os.curdir):
643    """Find all files under 'dir' and return the list of full filenames
644    (relative to 'dir').
645    """
646    list = []
647    stack = [dir]
648    pop = stack.pop
649    push = stack.append
650
651    while stack:
652        dir = pop()
653        names = os.listdir (dir)
654
655        for name in names:
656            if dir != os.curdir:        # avoid the dreaded "./" syndrome
657                fullname = os.path.join (dir, name)
658            else:
659                fullname = name
660            list.append (fullname)
661            if os.path.isdir (fullname) and not os.path.islink(fullname):
662                push (fullname)
663
664    return list
665
666
667def glob_to_re (pattern):
668    """Translate a shell-like glob pattern to a regular expression; return
669    a string containing the regex.  Differs from 'fnmatch.translate()' in
670    that '*' does not match "special characters" (which are
671    platform-specific).
672    """
673    pattern_re = fnmatch.translate (pattern)
674
675    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
676    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
677    # and by extension they shouldn't match such "special characters" under
678    # any OS.  So change all non-escaped dots in the RE to match any
679    # character except the special characters.
680    # XXX currently the "special characters" are just slash -- i.e. this is
681    # Unix-only.
682    pattern_re = re.sub (r'(^|[^\\])\.', r'\1[^/]', pattern_re)
683    return pattern_re
684
685# glob_to_re ()
686
687
688def translate_pattern (pattern, anchor=1, prefix=None):
689    """Translate a shell-like wildcard pattern to a compiled regular
690    expression.  Return the compiled regex.
691    """
692    if pattern:
693        pattern_re = glob_to_re (pattern)
694    else:
695        pattern_re = ''
696
697    if prefix is not None:
698        prefix_re = (glob_to_re (prefix))[0:-1] # ditch trailing $
699        pattern_re = "^" + os.path.join (prefix_re, ".*" + pattern_re)
700    else:                               # no prefix -- respect anchor flag
701        if anchor:
702            pattern_re = "^" + pattern_re
703
704    return re.compile (pattern_re)
705
706# translate_pattern ()
707