sdist.py revision d8dfb4c4b8d661acee263e3feb77974ced69e97d
1"""distutils.command.sdist
2
3Implements the Distutils 'sdist' command (create a source distribution)."""
4
5# created 1999/09/22, Greg Ward
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10import fnmatch
11from types import *
12from glob import glob
13from distutils.core import Command
14from distutils.util import newer, create_tree, remove_tree, convert_path, \
15     write_file
16from distutils.archive_util import check_archive_formats
17from distutils.text_file import TextFile
18from distutils.errors import DistutilsExecError, DistutilsOptionError
19
20
21class sdist (Command):
22
23    description = "create a source distribution (tarball, zip file, etc.)"
24
25    user_options = [
26        ('template=', 't',
27         "name of manifest template file [default: MANIFEST.in]"),
28        ('manifest=', 'm',
29         "name of manifest file [default: MANIFEST]"),
30        ('use-defaults', None,
31         "include the default file set in the manifest "
32         "[default; disable with --no-defaults]"),
33        ('manifest-only', 'o',
34         "just regenerate the manifest and then stop"),
35        ('force-manifest', 'f',
36         "forcibly regenerate the manifest and carry on as usual"),
37        ('formats=', None,
38         "formats for source distribution (tar, ztar, gztar, bztar, or zip)"),
39        ('keep-tree', 'k',
40         "keep the distribution tree around after creating " +
41         "archive file(s)"),
42        ]
43    negative_opts = {'use-defaults': 'no-defaults'}
44
45    default_format = { 'posix': 'gztar',
46                       'nt': 'zip' }
47
48    exclude_re = re.compile (r'\s*!\s*(\S+)') # for manifest lines
49
50
51    def initialize_options (self):
52        # 'template' and 'manifest' are, respectively, the names of
53        # the manifest template and manifest file.
54        self.template = None
55        self.manifest = None
56
57        # 'use_defaults': if true, we will include the default file set
58        # in the manifest
59        self.use_defaults = 1
60
61        self.manifest_only = 0
62        self.force_manifest = 0
63
64        self.formats = None
65        self.keep_tree = 0
66
67
68    def finalize_options (self):
69        if self.manifest is None:
70            self.manifest = "MANIFEST"
71        if self.template is None:
72            self.template = "MANIFEST.in"
73
74        if self.formats is None:
75            try:
76                self.formats = [self.default_format[os.name]]
77            except KeyError:
78                raise DistutilsPlatformError, \
79                      "don't know how to create source distributions " + \
80                      "on platform %s" % os.name
81        elif type (self.formats) is StringType:
82            self.formats = string.split (self.formats, ',')
83
84        bad_format = check_archive_formats (self.formats)
85        if bad_format:
86            raise DistutilsOptionError, \
87                  "unknown archive format '%s'" % bad_format
88
89
90    def run (self):
91
92        # 'files' is the list of files that will make up the manifest
93        self.files = []
94
95        # Ensure that all required meta-data is given; warn if not (but
96        # don't die, it's not *that* serious!)
97        self.check_metadata ()
98
99        # Do whatever it takes to get the list of files to process
100        # (process the manifest template, read an existing manifest,
101        # whatever).  File list is put into 'self.files'.
102        self.get_file_list ()
103
104        # If user just wanted us to regenerate the manifest, stop now.
105        if self.manifest_only:
106            return
107
108        # Otherwise, go ahead and create the source distribution tarball,
109        # or zipfile, or whatever.
110        self.make_distribution ()
111
112
113    def check_metadata (self):
114
115        metadata = self.distribution.metadata
116
117        missing = []
118        for attr in ('name', 'version', 'url'):
119            if not (hasattr (metadata, attr) and getattr (metadata, attr)):
120                missing.append (attr)
121
122        if missing:
123            self.warn ("missing required meta-data: " +
124                       string.join (missing, ", "))
125
126        if metadata.author:
127            if not metadata.author_email:
128                self.warn ("missing meta-data: if 'author' supplied, " +
129                           "'author_email' must be supplied too")
130        elif metadata.maintainer:
131            if not metadata.maintainer_email:
132                self.warn ("missing meta-data: if 'maintainer' supplied, " +
133                           "'maintainer_email' must be supplied too")
134        else:
135            self.warn ("missing meta-data: either (author and author_email) " +
136                       "or (maintainer and maintainer_email) " +
137                       "must be supplied")
138
139    # check_metadata ()
140
141
142    def get_file_list (self):
143        """Figure out the list of files to include in the source
144           distribution, and put it in 'self.files'.  This might
145           involve reading the manifest template (and writing the
146           manifest), or just reading the manifest, or just using
147           the default file set -- it all depends on the user's
148           options and the state of the filesystem."""
149
150
151        template_exists = os.path.isfile (self.template)
152        if template_exists:
153            template_newer = newer (self.template, self.manifest)
154
155        # Regenerate the manifest if necessary (or if explicitly told to)
156        if ((template_exists and template_newer) or
157            self.force_manifest or
158            self.manifest_only):
159
160            if not template_exists:
161                self.warn (("manifest template '%s' does not exist " +
162                            "(using default file list)") %
163                           self.template)
164
165            # Add default file set to 'files'
166            if self.use_defaults:
167                self.find_defaults ()
168
169            # Read manifest template if it exists
170            if template_exists:
171                self.read_template ()
172
173            # File list now complete -- sort it so that higher-level files
174            # come first
175            sortable_files = map (os.path.split, self.files)
176            sortable_files.sort ()
177            self.files = []
178            for sort_tuple in sortable_files:
179                self.files.append (apply (os.path.join, sort_tuple))
180
181            # Remove duplicates from the file list
182            for i in range (len(self.files)-1, 0, -1):
183                if self.files[i] == self.files[i-1]:
184                    del self.files[i]
185
186            # And write complete file list (including default file set) to
187            # the manifest.
188            self.write_manifest ()
189
190        # Don't regenerate the manifest, just read it in.
191        else:
192            self.read_manifest ()
193
194    # get_file_list ()
195
196
197    def find_defaults (self):
198
199        standards = [('README', 'README.txt'), 'setup.py']
200        for fn in standards:
201            if type (fn) is TupleType:
202                alts = fn
203                got_it = 0
204                for fn in alts:
205                    if os.path.exists (fn):
206                        got_it = 1
207                        self.files.append (fn)
208                        break
209
210                if not got_it:
211                    self.warn ("standard file not found: should have one of " +
212                               string.join (alts, ', '))
213            else:
214                if os.path.exists (fn):
215                    self.files.append (fn)
216                else:
217                    self.warn ("standard file '%s' not found" % fn)
218
219        optional = ['test/test*.py']
220        for pattern in optional:
221            files = filter (os.path.isfile, glob (pattern))
222            if files:
223                self.files.extend (files)
224
225        if self.distribution.has_pure_modules():
226            build_py = self.get_finalized_command ('build_py')
227            self.files.extend (build_py.get_source_files ())
228
229        if self.distribution.has_ext_modules():
230            build_ext = self.get_finalized_command ('build_ext')
231            self.files.extend (build_ext.get_source_files ())
232
233        if self.distribution.has_c_libraries():
234            build_clib = self.get_finalized_command ('build_clib')
235            self.files.extend (build_clib.get_source_files ())
236
237
238    def search_dir (self, dir, pattern=None):
239        """Recursively find files under 'dir' matching 'pattern' (a string
240           containing a Unix-style glob pattern).  If 'pattern' is None,
241           find all files under 'dir'.  Return the list of found
242           filenames."""
243
244        allfiles = findall (dir)
245        if pattern is None:
246            return allfiles
247
248        pattern_re = translate_pattern (pattern)
249        files = []
250        for file in allfiles:
251            if pattern_re.match (os.path.basename (file)):
252                files.append (file)
253
254        return files
255
256    # search_dir ()
257
258
259    def exclude_pattern (self, pattern):
260        """Remove filenames from 'self.files' that match 'pattern'."""
261        print "exclude_pattern: pattern=%s" % pattern
262        pattern_re = translate_pattern (pattern)
263        for i in range (len (self.files)-1, -1, -1):
264            if pattern_re.match (self.files[i]):
265                print "removing %s" % self.files[i]
266                del self.files[i]
267
268
269    def recursive_exclude_pattern (self, dir, pattern=None):
270        """Remove filenames from 'self.files' that are under 'dir'
271           and whose basenames match 'pattern'."""
272
273        print "recursive_exclude_pattern: dir=%s, pattern=%s" % (dir, pattern)
274        if pattern is None:
275            pattern_re = None
276        else:
277            pattern_re = translate_pattern (pattern)
278
279        for i in range (len (self.files)-1, -1, -1):
280            (cur_dir, cur_base) = os.path.split (self.files[i])
281            if (cur_dir == dir and
282                (pattern_re is None or pattern_re.match (cur_base))):
283                print "removing %s" % self.files[i]
284                del self.files[i]
285
286
287    def read_template (self):
288        """Read and parse the manifest template file named by
289           'self.template' (usually "MANIFEST.in").  Process all file
290           specifications (include and exclude) in the manifest template
291           and add the resulting filenames to 'self.files'."""
292
293        assert self.files is not None and type (self.files) is ListType
294
295        template = TextFile (self.template,
296                             strip_comments=1,
297                             skip_blanks=1,
298                             join_lines=1,
299                             lstrip_ws=1,
300                             rstrip_ws=1,
301                             collapse_ws=1)
302
303        all_files = findall ()
304
305        while 1:
306
307            line = template.readline()
308            if line is None:            # end of file
309                break
310
311            words = string.split (line)
312            action = words[0]
313
314            # First, check that the right number of words are present
315            # for the given action (which is the first word)
316            if action in ('include','exclude',
317                          'global-include','global-exclude'):
318                if len (words) < 2:
319                    template.warn \
320                        ("invalid manifest template line: " +
321                         "'%s' expects <pattern1> <pattern2> ..." %
322                         action)
323                    continue
324
325                pattern_list = map(convert_path, words[1:])
326
327            elif action in ('recursive-include','recursive-exclude'):
328                if len (words) < 3:
329                    template.warn \
330                        ("invalid manifest template line: " +
331                         "'%s' expects <dir> <pattern1> <pattern2> ..." %
332                         action)
333                    continue
334
335                dir = convert_path(words[1])
336                pattern_list = map (convert_path, words[2:])
337
338            elif action in ('graft','prune'):
339                if len (words) != 2:
340                    template.warn \
341                        ("invalid manifest template line: " +
342                         "'%s' expects a single <dir_pattern>" %
343                         action)
344                    continue
345
346                dir_pattern = convert_path (words[1])
347
348            else:
349                template.warn ("invalid manifest template line: " +
350                               "unknown action '%s'" % action)
351                continue
352
353            # OK, now we know that the action is valid and we have the
354            # right number of words on the line for that action -- so we
355            # can proceed with minimal error-checking.  Also, we have
356            # defined either (pattern), (dir and pattern), or
357            # (dir_pattern) -- so we don't have to spend any time
358            # digging stuff up out of 'words'.
359
360            if action == 'include':
361                print "include", string.join(pattern_list)
362                for pattern in pattern_list:
363                    files = select_pattern (all_files, pattern, anchor=1)
364                    if not files:
365                        template.warn ("no files found matching '%s'" % pattern)
366                    else:
367                        self.files.extend (files)
368
369            elif action == 'exclude':
370                print "exclude", string.join(pattern_list)
371                for pattern in pattern_list:
372                    num = exclude_pattern (self.files, pattern, anchor=1)
373                    if num == 0:
374                        template.warn (
375                            "no previously-included files found matching '%s'"%
376                            pattern)
377
378            elif action == 'global-include':
379                print "global-include", string.join(pattern_list)
380                for pattern in pattern_list:
381                    files = select_pattern (all_files, pattern, anchor=0)
382                    if not files:
383                        template.warn (("no files found matching '%s' " +
384                                        "anywhere in distribution") %
385                                       pattern)
386                    else:
387                        self.files.extend (files)
388
389            elif action == 'global-exclude':
390                print "global-exclude", string.join(pattern_list)
391                for pattern in pattern_list:
392                    num = exclude_pattern (self.files, pattern, anchor=0)
393                    if num == 0:
394                        template.warn \
395                            (("no previously-included files matching '%s' " +
396                              "found anywhere in distribution") %
397                             pattern)
398
399            elif action == 'recursive-include':
400                print "recursive-include", dir, string.join(pattern_list)
401                for pattern in pattern_list:
402                    files = select_pattern (all_files, pattern, prefix=dir)
403                    if not files:
404                        template.warn (("no files found matching '%s' " +
405                                        "under directory '%s'") %
406                                       (pattern, dir))
407                    else:
408                        self.files.extend (files)
409
410            elif action == 'recursive-exclude':
411                print "recursive-exclude", dir, string.join(pattern_list)
412                for pattern in pattern_list:
413                    num = exclude_pattern (self.files, pattern, prefix=dir)
414                    if num == 0:
415                        template.warn \
416                            (("no previously-included files matching '%s' " +
417                              "found under directory '%s'") %
418                             (pattern, dir))
419
420            elif action == 'graft':
421                print "graft", dir_pattern
422                files = select_pattern (all_files, None, prefix=dir_pattern)
423                if not files:
424                    template.warn ("no directories found matching '%s'" %
425                                   dir_pattern)
426                else:
427                    self.files.extend (files)
428
429            elif action == 'prune':
430                print "prune", dir_pattern
431                num = exclude_pattern (self.files, None, prefix=dir_pattern)
432                if num == 0:
433                    template.warn \
434                        (("no previously-included directories found " +
435                          "matching '%s'") %
436                         dir_pattern)
437            else:
438                raise RuntimeError, \
439                      "this cannot happen: invalid action '%s'" % action
440
441        # while loop over lines of template file
442
443        # Prune away the build and source distribution directories
444        build = self.get_finalized_command ('build')
445        exclude_pattern (self.files, None, prefix=build.build_base)
446
447        base_dir = self.distribution.get_fullname()
448        exclude_pattern (self.files, None, prefix=base_dir)
449
450    # read_template ()
451
452
453    def write_manifest (self):
454        """Write the file list in 'self.files' (presumably as filled in
455           by 'find_defaults()' and 'read_template()') to the manifest file
456           named by 'self.manifest'."""
457
458        self.execute(write_file,
459                     (self.manifest, self.files),
460                     "writing manifest file")
461
462    # write_manifest ()
463
464
465    def read_manifest (self):
466        """Read the manifest file (named by 'self.manifest') and use
467           it to fill in 'self.files', the list of files to include
468           in the source distribution."""
469
470        manifest = open (self.manifest)
471        while 1:
472            line = manifest.readline ()
473            if line == '':              # end of file
474                break
475            if line[-1] == '\n':
476                line = line[0:-1]
477            self.files.append (line)
478
479    # read_manifest ()
480
481
482
483    def make_release_tree (self, base_dir, files):
484
485        # Create all the directories under 'base_dir' necessary to
486        # put 'files' there.
487        create_tree (base_dir, files,
488                     verbose=self.verbose, dry_run=self.dry_run)
489
490        # And walk over the list of files, either making a hard link (if
491        # os.link exists) to each one that doesn't already exist in its
492        # corresponding location under 'base_dir', or copying each file
493        # that's out-of-date in 'base_dir'.  (Usually, all files will be
494        # out-of-date, because by default we blow away 'base_dir' when
495        # we're done making the distribution archives.)
496
497        if hasattr (os, 'link'):        # can make hard links on this system
498            link = 'hard'
499            msg = "making hard links in %s..." % base_dir
500        else:                           # nope, have to copy
501            link = None
502            msg = "copying files to %s..." % base_dir
503
504        self.announce (msg)
505        for file in files:
506            dest = os.path.join (base_dir, file)
507            self.copy_file (file, dest, link=link)
508
509    # make_release_tree ()
510
511
512    def make_distribution (self):
513
514        # Don't warn about missing meta-data here -- should be (and is!)
515        # done elsewhere.
516        base_dir = self.distribution.get_fullname()
517
518        # Remove any files that match "base_dir" from the fileset -- we
519        # don't want to go distributing the distribution inside itself!
520        self.exclude_pattern (base_dir + "*")
521
522        self.make_release_tree (base_dir, self.files)
523        for fmt in self.formats:
524            self.make_archive (base_dir, fmt, base_dir=base_dir)
525
526        if not self.keep_tree:
527            remove_tree (base_dir, self.verbose, self.dry_run)
528
529# class sdist
530
531
532# ----------------------------------------------------------------------
533# Utility functions
534
535def findall (dir = os.curdir):
536    """Find all files under 'dir' and return the list of full
537       filenames (relative to 'dir')."""
538
539    list = []
540    stack = [dir]
541    pop = stack.pop
542    push = stack.append
543
544    while stack:
545        dir = pop()
546        names = os.listdir (dir)
547
548        for name in names:
549            if dir != os.curdir:        # avoid the dreaded "./" syndrome
550                fullname = os.path.join (dir, name)
551            else:
552                fullname = name
553            list.append (fullname)
554            if os.path.isdir (fullname) and not os.path.islink(fullname):
555                push (fullname)
556
557    return list
558
559
560def select_pattern (files, pattern, anchor=1, prefix=None):
561    """Select strings (presumably filenames) from 'files' that match
562       'pattern', a Unix-style wildcard (glob) pattern.  Patterns are not
563       quite the same as implemented by the 'fnmatch' module: '*' and '?'
564       match non-special characters, where "special" is platform-dependent:
565       slash on Unix, colon, slash, and backslash on DOS/Windows, and colon
566       on Mac OS.
567
568       If 'anchor' is true (the default), then the pattern match is more
569       stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
570       'anchor' is false, both of these will match.
571
572       If 'prefix' is supplied, then only filenames starting with 'prefix'
573       (itself a pattern) and ending with 'pattern', with anything in
574       between them, will match.  'anchor' is ignored in this case.
575
576       Return the list of matching strings, possibly empty."""
577
578    matches = []
579    pattern_re = translate_pattern (pattern, anchor, prefix)
580    print "select_pattern: applying re %s" % pattern_re.pattern
581    for name in files:
582        if pattern_re.search (name):
583            matches.append (name)
584            print " adding", name
585
586    return matches
587
588# select_pattern ()
589
590
591def exclude_pattern (files, pattern, anchor=1, prefix=None):
592
593    pattern_re = translate_pattern (pattern, anchor, prefix)
594    print "exclude_pattern: applying re %s" % pattern_re.pattern
595    for i in range (len(files)-1, -1, -1):
596        if pattern_re.search (files[i]):
597            print " removing", files[i]
598            del files[i]
599
600# exclude_pattern ()
601
602
603def glob_to_re (pattern):
604    """Translate a shell-like glob pattern to a regular expression;
605       return a string containing the regex.  Differs from
606       'fnmatch.translate()' in that '*' does not match "special
607       characters" (which are platform-specific)."""
608    pattern_re = fnmatch.translate (pattern)
609
610    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
611    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
612    # and by extension they shouldn't match such "special characters" under
613    # any OS.  So change all non-escaped dots in the RE to match any
614    # character except the special characters.
615    # XXX currently the "special characters" are just slash -- i.e. this is
616    # Unix-only.
617    pattern_re = re.sub (r'(^|[^\\])\.', r'\1[^/]', pattern_re)
618    return pattern_re
619
620# glob_to_re ()
621
622
623def translate_pattern (pattern, anchor=1, prefix=None):
624    """Translate a shell-like wildcard pattern to a compiled regular
625       expression.    Return the compiled regex."""
626
627    if pattern:
628        pattern_re = glob_to_re (pattern)
629    else:
630        pattern_re = ''
631
632    if prefix is not None:
633        prefix_re = (glob_to_re (prefix))[0:-1] # ditch trailing $
634        pattern_re = "^" + os.path.join (prefix_re, ".*" + pattern_re)
635    else:                               # no prefix -- respect anchor flag
636        if anchor:
637            pattern_re = "^" + pattern_re
638
639    return re.compile (pattern_re)
640
641# translate_pattern ()
642