sdist.py revision 0ae7f76b40a6700491aa739070f2a830dbbb0409
1"""distutils.command.sdist
2
3Implements the Distutils 'sdist' command (create a source distribution)."""
4
5# created 1999/09/22, Greg Ward
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10import fnmatch
11from types import *
12from glob import glob
13from distutils.core import Command
14from distutils.util import \
15     newer, create_tree, remove_tree, make_tarball, make_zipfile, native_path
16from distutils.text_file import TextFile
17from distutils.errors import DistutilsExecError
18
19
20class sdist (Command):
21
22    description = "create a source distribution (tarball, zip file, etc.)"
23
24    user_options = [
25        ('template=', 't',
26         "name of manifest template file [default: MANIFEST.in]"),
27        ('manifest=', 'm',
28         "name of manifest file [default: MANIFEST]"),
29        ('use-defaults', None,
30         "include the default file set in the manifest "
31         "[default; disable with --no-defaults]"),
32        ('manifest-only', None,
33         "just regenerate the manifest and then stop"),
34        ('force-manifest', None,
35         "forcibly regenerate the manifest and carry on as usual"),
36
37        ('formats=', None,
38         "formats for source distribution (tar, ztar, gztar, or zip)"),
39        ('keep-tree', 'k',
40         "keep the distribution tree around after creating " +
41         "archive file(s)"),
42        ]
43    negative_opts = {'use-defaults': 'no-defaults'}
44
45    default_format = { 'posix': 'gztar',
46                       'nt': 'zip' }
47
48    exclude_re = re.compile (r'\s*!\s*(\S+)') # for manifest lines
49
50
51    def initialize_options (self):
52        # 'template' and 'manifest' are, respectively, the names of
53        # the manifest template and manifest file.
54        self.template = None
55        self.manifest = None
56
57        # 'use_defaults': if true, we will include the default file set
58        # in the manifest
59        self.use_defaults = 1
60
61        self.manifest_only = 0
62        self.force_manifest = 0
63
64        self.formats = None
65        self.keep_tree = 0
66
67
68    def finalize_options (self):
69        if self.manifest is None:
70            self.manifest = "MANIFEST"
71        if self.template is None:
72            self.template = "MANIFEST.in"
73
74        if self.formats is None:
75            try:
76                self.formats = [self.default_format[os.name]]
77            except KeyError:
78                raise DistutilsPlatformError, \
79                      "don't know how to create source distributions " + \
80                      "on platform %s" % os.name
81        elif type (self.formats) is StringType:
82            self.formats = string.split (self.formats, ',')
83
84
85    def run (self):
86
87        # 'files' is the list of files that will make up the manifest
88        self.files = []
89
90        # Ensure that all required meta-data is given; warn if not (but
91        # don't die, it's not *that* serious!)
92        self.check_metadata ()
93
94        # Do whatever it takes to get the list of files to process
95        # (process the manifest template, read an existing manifest,
96        # whatever).  File list is put into 'self.files'.
97        self.get_file_list ()
98
99        # If user just wanted us to regenerate the manifest, stop now.
100        if self.manifest_only:
101            return
102
103        # Otherwise, go ahead and create the source distribution tarball,
104        # or zipfile, or whatever.
105        self.make_distribution ()
106
107
108    def check_metadata (self):
109
110        metadata = self.distribution.metadata
111
112        missing = []
113        for attr in ('name', 'version', 'url'):
114            if not (hasattr (metadata, attr) and getattr (metadata, attr)):
115                missing.append (attr)
116
117        if missing:
118            self.warn ("missing required meta-data: " +
119                       string.join (missing, ", "))
120
121        if metadata.author:
122            if not metadata.author_email:
123                self.warn ("missing meta-data: if 'author' supplied, " +
124                           "'author_email' must be supplied too")
125        elif metadata.maintainer:
126            if not metadata.maintainer_email:
127                self.warn ("missing meta-data: if 'maintainer' supplied, " +
128                           "'maintainer_email' must be supplied too")
129        else:
130            self.warn ("missing meta-data: either (author and author_email) " +
131                       "or (maintainer and maintainer_email) " +
132                       "must be supplied")
133
134    # check_metadata ()
135
136
137    def get_file_list (self):
138        """Figure out the list of files to include in the source
139           distribution, and put it in 'self.files'.  This might
140           involve reading the manifest template (and writing the
141           manifest), or just reading the manifest, or just using
142           the default file set -- it all depends on the user's
143           options and the state of the filesystem."""
144
145
146        template_exists = os.path.isfile (self.template)
147        if template_exists:
148            template_newer = newer (self.template, self.manifest)
149
150        # Regenerate the manifest if necessary (or if explicitly told to)
151        if ((template_exists and template_newer) or
152            self.force_manifest or
153            self.manifest_only):
154
155            if not template_exists:
156                self.warn (("manifest template '%s' does not exist " +
157                            "(using default file list)") %
158                           self.template)
159
160            # Add default file set to 'files'
161            if self.use_defaults:
162                self.find_defaults ()
163
164            # Read manifest template if it exists
165            if template_exists:
166                self.read_template ()
167
168            # File list now complete -- sort it so that higher-level files
169            # come first
170            sortable_files = map (os.path.split, self.files)
171            sortable_files.sort ()
172            self.files = []
173            for sort_tuple in sortable_files:
174                self.files.append (apply (os.path.join, sort_tuple))
175
176            # Remove duplicates from the file list
177            for i in range (len(self.files)-1, 0, -1):
178                if self.files[i] == self.files[i-1]:
179                    del self.files[i]
180
181            # And write complete file list (including default file set) to
182            # the manifest.
183            self.write_manifest ()
184
185        # Don't regenerate the manifest, just read it in.
186        else:
187            self.read_manifest ()
188
189    # get_file_list ()
190
191
192    def find_defaults (self):
193
194        standards = [('README', 'README.txt'), 'setup.py']
195        for fn in standards:
196            if type (fn) is TupleType:
197                alts = fn
198                got_it = 0
199                for fn in alts:
200                    if os.path.exists (fn):
201                        got_it = 1
202                        self.files.append (fn)
203                        break
204
205                if not got_it:
206                    self.warn ("standard file not found: should have one of " +
207                               string.join (alts, ', '))
208            else:
209                if os.path.exists (fn):
210                    self.files.append (fn)
211                else:
212                    self.warn ("standard file '%s' not found" % fn)
213
214        optional = ['test/test*.py']
215        for pattern in optional:
216            files = filter (os.path.isfile, glob (pattern))
217            if files:
218                self.files.extend (files)
219
220        if self.distribution.has_pure_modules():
221            build_py = self.find_peer ('build_py')
222            self.files.extend (build_py.get_source_files ())
223
224        if self.distribution.has_ext_modules():
225            build_ext = self.find_peer ('build_ext')
226            self.files.extend (build_ext.get_source_files ())
227
228        if self.distribution.has_c_libraries():
229            build_clib = self.find_peer ('build_clib')
230            self.files.extend (build_clib.get_source_files ())
231
232
233    def search_dir (self, dir, pattern=None):
234        """Recursively find files under 'dir' matching 'pattern' (a string
235           containing a Unix-style glob pattern).  If 'pattern' is None,
236           find all files under 'dir'.  Return the list of found
237           filenames."""
238
239        allfiles = findall (dir)
240        if pattern is None:
241            return allfiles
242
243        pattern_re = translate_pattern (pattern)
244        files = []
245        for file in allfiles:
246            if pattern_re.match (os.path.basename (file)):
247                files.append (file)
248
249        return files
250
251    # search_dir ()
252
253
254    def exclude_pattern (self, pattern):
255        """Remove filenames from 'self.files' that match 'pattern'."""
256        print "exclude_pattern: pattern=%s" % pattern
257        pattern_re = translate_pattern (pattern)
258        for i in range (len (self.files)-1, -1, -1):
259            if pattern_re.match (self.files[i]):
260                print "removing %s" % self.files[i]
261                del self.files[i]
262
263
264    def recursive_exclude_pattern (self, dir, pattern=None):
265        """Remove filenames from 'self.files' that are under 'dir'
266           and whose basenames match 'pattern'."""
267
268        print "recursive_exclude_pattern: dir=%s, pattern=%s" % (dir, pattern)
269        if pattern is None:
270            pattern_re = None
271        else:
272            pattern_re = translate_pattern (pattern)
273
274        for i in range (len (self.files)-1, -1, -1):
275            (cur_dir, cur_base) = os.path.split (self.files[i])
276            if (cur_dir == dir and
277                (pattern_re is None or pattern_re.match (cur_base))):
278                print "removing %s" % self.files[i]
279                del self.files[i]
280
281
282    def read_template (self):
283        """Read and parse the manifest template file named by
284           'self.template' (usually "MANIFEST.in").  Process all file
285           specifications (include and exclude) in the manifest template
286           and add the resulting filenames to 'self.files'."""
287
288        assert self.files is not None and type (self.files) is ListType
289
290        template = TextFile (self.template,
291                             strip_comments=1,
292                             skip_blanks=1,
293                             join_lines=1,
294                             lstrip_ws=1,
295                             rstrip_ws=1,
296                             collapse_ws=1)
297
298        all_files = findall ()
299
300        while 1:
301
302            line = template.readline()
303            if line is None:            # end of file
304                break
305
306            words = string.split (line)
307            action = words[0]
308
309            # First, check that the right number of words are present
310            # for the given action (which is the first word)
311            if action in ('include','exclude',
312                          'global-include','global-exclude'):
313                if len (words) < 2:
314                    template.warn \
315                        ("invalid manifest template line: " +
316                         "'%s' expects <pattern1> <pattern2> ..." %
317                         action)
318                    continue
319
320                pattern_list = map(native_path, words[1:])
321
322            elif action in ('recursive-include','recursive-exclude'):
323                if len (words) < 3:
324                    template.warn \
325                        ("invalid manifest template line: " +
326                         "'%s' expects <dir> <pattern1> <pattern2> ..." %
327                         action)
328                    continue
329
330                dir = native_path(words[1])
331                pattern_list = map (native_path, words[2:])
332
333            elif action in ('graft','prune'):
334                if len (words) != 2:
335                    template.warn \
336                        ("invalid manifest template line: " +
337                         "'%s' expects a single <dir_pattern>" %
338                         action)
339                    continue
340
341                dir_pattern = native_path (words[1])
342
343            else:
344                template.warn ("invalid manifest template line: " +
345                               "unknown action '%s'" % action)
346                continue
347
348            # OK, now we know that the action is valid and we have the
349            # right number of words on the line for that action -- so we
350            # can proceed with minimal error-checking.  Also, we have
351            # defined either (pattern), (dir and pattern), or
352            # (dir_pattern) -- so we don't have to spend any time
353            # digging stuff up out of 'words'.
354
355            if action == 'include':
356                print "include", string.join(pattern_list)
357                for pattern in pattern_list:
358                    files = select_pattern (all_files, pattern, anchor=1)
359                    if not files:
360                        template.warn ("no files found matching '%s'" % pattern)
361                    else:
362                        self.files.extend (files)
363
364            elif action == 'exclude':
365                print "exclude", string.join(pattern_list)
366                for pattern in pattern_list:
367                    num = exclude_pattern (self.files, pattern, anchor=1)
368                    if num == 0:
369                        template.warn (
370                            "no previously-included files found matching '%s'"%
371                            pattern)
372
373            elif action == 'global-include':
374                print "global-include", string.join(pattern_list)
375                for pattern in pattern_list:
376                    files = select_pattern (all_files, pattern, anchor=0)
377                    if not files:
378                        template.warn (("no files found matching '%s' " +
379                                        "anywhere in distribution") %
380                                       pattern)
381                    else:
382                        self.files.extend (files)
383
384            elif action == 'global-exclude':
385                print "global-exclude", string.join(pattern_list)
386                for pattern in pattern_list:
387                    num = exclude_pattern (self.files, pattern, anchor=0)
388                    if num == 0:
389                        template.warn \
390                            (("no previously-included files matching '%s' " +
391                              "found anywhere in distribution") %
392                             pattern)
393
394            elif action == 'recursive-include':
395                print "recursive-include", dir, string.join(pattern_list)
396                for pattern in pattern_list:
397                    files = select_pattern (all_files, pattern, prefix=dir)
398                    if not files:
399                        template.warn (("no files found matching '%s' " +
400                                        "under directory '%s'") %
401                                       (pattern, dir))
402                    else:
403                        self.files.extend (files)
404
405            elif action == 'recursive-exclude':
406                print "recursive-exclude", dir, string.join(pattern_list)
407                for pattern in pattern_list:
408                    num = exclude_pattern (self.files, pattern, prefix=dir)
409                    if num == 0:
410                        template.warn \
411                            (("no previously-included files matching '%s' " +
412                              "found under directory '%s'") %
413                             (pattern, dir))
414
415            elif action == 'graft':
416                print "graft", dir_pattern
417                files = select_pattern (all_files, None, prefix=dir_pattern)
418                if not files:
419                    template.warn ("no directories found matching '%s'" %
420                                   dir_pattern)
421                else:
422                    self.files.extend (files)
423
424            elif action == 'prune':
425                print "prune", dir_pattern
426                num = exclude_pattern (self.files, None, prefix=dir_pattern)
427                if num == 0:
428                    template.warn \
429                        (("no previously-included directories found " +
430                          "matching '%s'") %
431                         dir_pattern)
432            else:
433                raise RuntimeError, \
434                      "this cannot happen: invalid action '%s'" % action
435
436        # while loop over lines of template file
437
438    # read_template ()
439
440
441    def write_manifest (self):
442        """Write the file list in 'self.files' (presumably as filled in
443           by 'find_defaults()' and 'read_template()') to the manifest file
444           named by 'self.manifest'."""
445
446        manifest = open (self.manifest, "w")
447        for fn in self.files:
448            manifest.write (fn + '\n')
449        manifest.close ()
450
451    # write_manifest ()
452
453
454    def read_manifest (self):
455        """Read the manifest file (named by 'self.manifest') and use
456           it to fill in 'self.files', the list of files to include
457           in the source distribution."""
458
459        manifest = open (self.manifest)
460        while 1:
461            line = manifest.readline ()
462            if line == '':              # end of file
463                break
464            if line[-1] == '\n':
465                line = line[0:-1]
466            self.files.append (line)
467
468    # read_manifest ()
469
470
471
472    def make_release_tree (self, base_dir, files):
473
474        # Create all the directories under 'base_dir' necessary to
475        # put 'files' there.
476        create_tree (base_dir, files,
477                     verbose=self.verbose, dry_run=self.dry_run)
478
479        # And walk over the list of files, either making a hard link (if
480        # os.link exists) to each one that doesn't already exist in its
481        # corresponding location under 'base_dir', or copying each file
482        # that's out-of-date in 'base_dir'.  (Usually, all files will be
483        # out-of-date, because by default we blow away 'base_dir' when
484        # we're done making the distribution archives.)
485
486        if hasattr (os, 'link'):        # can make hard links on this system
487            link = 'hard'
488            msg = "making hard links in %s..." % base_dir
489        else:                           # nope, have to copy
490            link = None
491            msg = "copying files to %s..." % base_dir
492
493        self.announce (msg)
494        for file in files:
495            dest = os.path.join (base_dir, file)
496            self.copy_file (file, dest, link=link)
497
498    # make_release_tree ()
499
500
501    def make_distribution (self):
502
503        # Don't warn about missing meta-data here -- should be (and is!)
504        # done elsewhere.
505        base_dir = self.distribution.get_fullname()
506
507        # Remove any files that match "base_dir" from the fileset -- we
508        # don't want to go distributing the distribution inside itself!
509        self.exclude_pattern (base_dir + "*")
510
511        self.make_release_tree (base_dir, self.files)
512        for fmt in self.formats:
513            self.make_archive (base_dir, fmt, base_dir=base_dir)
514
515        if not self.keep_tree:
516            remove_tree (base_dir, self.verbose, self.dry_run)
517
518# class Dist
519
520
521# ----------------------------------------------------------------------
522# Utility functions
523
524def findall (dir = os.curdir):
525    """Find all files under 'dir' and return the list of full
526       filenames (relative to 'dir')."""
527
528    list = []
529    stack = [dir]
530    pop = stack.pop
531    push = stack.append
532
533    while stack:
534        dir = pop()
535        names = os.listdir (dir)
536
537        for name in names:
538            if dir != os.curdir:        # avoid the dreaded "./" syndrome
539                fullname = os.path.join (dir, name)
540            else:
541                fullname = name
542            list.append (fullname)
543            if os.path.isdir (fullname) and not os.path.islink(fullname):
544                push (fullname)
545
546    return list
547
548
549def select_pattern (files, pattern, anchor=1, prefix=None):
550    """Select strings (presumably filenames) from 'files' that match
551       'pattern', a Unix-style wildcard (glob) pattern.  Patterns are not
552       quite the same as implemented by the 'fnmatch' module: '*' and '?'
553       match non-special characters, where "special" is platform-dependent:
554       slash on Unix, colon, slash, and backslash on DOS/Windows, and colon
555       on Mac OS.
556
557       If 'anchor' is true (the default), then the pattern match is more
558       stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
559       'anchor' is false, both of these will match.
560
561       If 'prefix' is supplied, then only filenames starting with 'prefix'
562       (itself a pattern) and ending with 'pattern', with anything in
563       between them, will match.  'anchor' is ignored in this case.
564
565       Return the list of matching strings, possibly empty."""
566
567    matches = []
568    pattern_re = translate_pattern (pattern, anchor, prefix)
569    print "select_pattern: applying re %s" % pattern_re.pattern
570    for name in files:
571        if pattern_re.search (name):
572            matches.append (name)
573            print " adding", name
574
575    return matches
576
577# select_pattern ()
578
579
580def exclude_pattern (files, pattern, anchor=1, prefix=None):
581
582    pattern_re = translate_pattern (pattern, anchor, prefix)
583    print "exclude_pattern: applying re %s" % pattern_re.pattern
584    for i in range (len(files)-1, -1, -1):
585        if pattern_re.search (files[i]):
586            print " removing", files[i]
587            del files[i]
588
589# exclude_pattern ()
590
591
592def glob_to_re (pattern):
593    """Translate a shell-like glob pattern to a regular expression;
594       return a string containing the regex.  Differs from
595       'fnmatch.translate()' in that '*' does not match "special
596       characters" (which are platform-specific)."""
597    pattern_re = fnmatch.translate (pattern)
598
599    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
600    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
601    # and by extension they shouldn't match such "special characters" under
602    # any OS.  So change all non-escaped dots in the RE to match any
603    # character except the special characters.
604    # XXX currently the "special characters" are just slash -- i.e. this is
605    # Unix-only.
606    pattern_re = re.sub (r'(^|[^\\])\.', r'\1[^/]', pattern_re)
607    return pattern_re
608
609# glob_to_re ()
610
611
612def translate_pattern (pattern, anchor=1, prefix=None):
613    """Translate a shell-like wildcard pattern to a compiled regular
614       expression.    Return the compiled regex."""
615
616    if pattern:
617        pattern_re = glob_to_re (pattern)
618    else:
619        pattern_re = ''
620
621    if prefix is not None:
622        prefix_re = (glob_to_re (prefix))[0:-1] # ditch trailing $
623        pattern_re = "^" + os.path.join (prefix_re, ".*" + pattern_re)
624    else:                               # no prefix -- respect anchor flag
625        if anchor:
626            pattern_re = "^" + pattern_re
627
628    return re.compile (pattern_re)
629
630# translate_pattern ()
631