sdist.py revision 2b9e43f8d04822abe14d84ce6dc6faf0f6d51c67
1"""distutils.command.sdist
2
3Implements the Distutils 'sdist' command (create a source distribution)."""
4
5# created 1999/09/22, Greg Ward
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10import fnmatch
11from types import *
12from glob import glob
13from distutils.core import Command
14from distutils.util import \
15     newer, create_tree, remove_tree, make_tarball, make_zipfile, native_path
16from distutils.text_file import TextFile
17from distutils.errors import DistutilsExecError
18
19
20class sdist (Command):
21
22    description = "create a source distribution (tarball, zip file, etc.)"
23
24    user_options = [
25        ('template=', 't',
26         "name of manifest template file [default: MANIFEST.in]"),
27        ('manifest=', 'm',
28         "name of manifest file [default: MANIFEST]"),
29        ('use-defaults', None,
30         "include the default file set in the manifest "
31         "[default; disable with --no-defaults]"),
32        ('manifest-only', None,
33         "just regenerate the manifest and then stop"),
34        ('force-manifest', None,
35         "forcibly regenerate the manifest and carry on as usual"),
36
37        ('formats=', None,
38         "formats for source distribution (tar, ztar, gztar, or zip)"),
39        ('keep-tree', 'k',
40         "keep the distribution tree around after creating " +
41         "archive file(s)"),
42        ]
43    negative_opts = {'use-defaults': 'no-defaults'}
44
45    default_format = { 'posix': 'gztar',
46                       'nt': 'zip' }
47
48    exclude_re = re.compile (r'\s*!\s*(\S+)') # for manifest lines
49
50
51    def initialize_options (self):
52        # 'template' and 'manifest' are, respectively, the names of
53        # the manifest template and manifest file.
54        self.template = None
55        self.manifest = None
56
57        # 'use_defaults': if true, we will include the default file set
58        # in the manifest
59        self.use_defaults = 1
60
61        self.manifest_only = 0
62        self.force_manifest = 0
63
64        self.formats = None
65        self.keep_tree = 0
66
67
68    def finalize_options (self):
69        if self.manifest is None:
70            self.manifest = "MANIFEST"
71        if self.template is None:
72            self.template = "MANIFEST.in"
73
74        if self.formats is None:
75            try:
76                self.formats = [self.default_format[os.name]]
77            except KeyError:
78                raise DistutilsPlatformError, \
79                      "don't know how to create source distributions " + \
80                      "on platform %s" % os.name
81        elif type (self.formats) is StringType:
82            self.formats = string.split (self.formats, ',')
83
84
85    def run (self):
86
87        # 'files' is the list of files that will make up the manifest
88        self.files = []
89
90        # Ensure that all required meta-data is given; warn if not (but
91        # don't die, it's not *that* serious!)
92        self.check_metadata ()
93
94        # Do whatever it takes to get the list of files to process
95        # (process the manifest template, read an existing manifest,
96        # whatever).  File list is put into 'self.files'.
97        self.get_file_list ()
98
99        # If user just wanted us to regenerate the manifest, stop now.
100        if self.manifest_only:
101            return
102
103        # Otherwise, go ahead and create the source distribution tarball,
104        # or zipfile, or whatever.
105        self.make_distribution ()
106
107
108    def check_metadata (self):
109
110        dist = self.distribution
111
112        missing = []
113        for attr in ('name', 'version', 'url'):
114            if not (hasattr (dist, attr) and getattr (dist, attr)):
115                missing.append (attr)
116
117        if missing:
118            self.warn ("missing required meta-data: " +
119                       string.join (missing, ", "))
120
121        if dist.author:
122            if not dist.author_email:
123                self.warn ("missing meta-data: if 'author' supplied, " +
124                           "'author_email' must be supplied too")
125        elif dist.maintainer:
126            if not dist.maintainer_email:
127                self.warn ("missing meta-data: if 'maintainer' supplied, " +
128                           "'maintainer_email' must be supplied too")
129        else:
130            self.warn ("missing meta-data: either (author and author_email) " +
131                       "or (maintainer and maintainer_email) " +
132                       "must be supplied")
133
134    # check_metadata ()
135
136
137    def get_file_list (self):
138        """Figure out the list of files to include in the source
139           distribution, and put it in 'self.files'.  This might
140           involve reading the manifest template (and writing the
141           manifest), or just reading the manifest, or just using
142           the default file set -- it all depends on the user's
143           options and the state of the filesystem."""
144
145
146        template_exists = os.path.isfile (self.template)
147        if template_exists:
148            template_newer = newer (self.template, self.manifest)
149
150        # Regenerate the manifest if necessary (or if explicitly told to)
151        if ((template_exists and template_newer) or
152            self.force_manifest or
153            self.manifest_only):
154
155            if not template_exists:
156                self.warn (("manifest template '%s' does not exist " +
157                            "(using default file list)") %
158                           self.template)
159
160            # Add default file set to 'files'
161            if self.use_defaults:
162                self.find_defaults ()
163
164            # Read manifest template if it exists
165            if template_exists:
166                self.read_template ()
167
168            # File list now complete -- sort it so that higher-level files
169            # come first
170            sortable_files = map (os.path.split, self.files)
171            sortable_files.sort ()
172            self.files = []
173            for sort_tuple in sortable_files:
174                self.files.append (apply (os.path.join, sort_tuple))
175
176            # Remove duplicates from the file list
177            for i in range (len(self.files)-1, 0, -1):
178                if self.files[i] == self.files[i-1]:
179                    del self.files[i]
180
181            # And write complete file list (including default file set) to
182            # the manifest.
183            self.write_manifest ()
184
185        # Don't regenerate the manifest, just read it in.
186        else:
187            self.read_manifest ()
188
189    # get_file_list ()
190
191
192    def find_defaults (self):
193
194        standards = [('README', 'README.txt'), 'setup.py']
195        for fn in standards:
196            if type (fn) is TupleType:
197                alts = fn
198                got_it = 0
199                for fn in alts:
200                    if os.path.exists (fn):
201                        got_it = 1
202                        self.files.append (fn)
203                        break
204
205                if not got_it:
206                    self.warn ("standard file not found: should have one of " +
207                               string.join (alts, ', '))
208            else:
209                if os.path.exists (fn):
210                    self.files.append (fn)
211                else:
212                    self.warn ("standard file '%s' not found" % fn)
213
214        optional = ['test/test*.py']
215        for pattern in optional:
216            files = filter (os.path.isfile, glob (pattern))
217            if files:
218                self.files.extend (files)
219
220        if self.distribution.has_pure_modules():
221            build_py = self.find_peer ('build_py')
222            self.files.extend (build_py.get_source_files ())
223
224        if self.distribution.has_ext_modules():
225            build_ext = self.find_peer ('build_ext')
226            self.files.extend (build_ext.get_source_files ())
227
228        if self.distribution.has_c_libraries():
229            build_clib = self.find_peer ('build_clib')
230            self.files.extend (build_clib.get_source_files ())
231
232
233    def search_dir (self, dir, pattern=None):
234        """Recursively find files under 'dir' matching 'pattern' (a string
235           containing a Unix-style glob pattern).  If 'pattern' is None,
236           find all files under 'dir'.  Return the list of found
237           filenames."""
238
239        allfiles = findall (dir)
240        if pattern is None:
241            return allfiles
242
243        pattern_re = translate_pattern (pattern)
244        files = []
245        for file in allfiles:
246            if pattern_re.match (os.path.basename (file)):
247                files.append (file)
248
249        return files
250
251    # search_dir ()
252
253
254    def exclude_pattern (self, pattern):
255        """Remove filenames from 'self.files' that match 'pattern'."""
256        print "exclude_pattern: pattern=%s" % pattern
257        pattern_re = translate_pattern (pattern)
258        for i in range (len (self.files)-1, -1, -1):
259            if pattern_re.match (self.files[i]):
260                print "removing %s" % self.files[i]
261                del self.files[i]
262
263
264    def recursive_exclude_pattern (self, dir, pattern=None):
265        """Remove filenames from 'self.files' that are under 'dir'
266           and whose basenames match 'pattern'."""
267
268        print "recursive_exclude_pattern: dir=%s, pattern=%s" % (dir, pattern)
269        if pattern is None:
270            pattern_re = None
271        else:
272            pattern_re = translate_pattern (pattern)
273
274        for i in range (len (self.files)-1, -1, -1):
275            (cur_dir, cur_base) = os.path.split (self.files[i])
276            if (cur_dir == dir and
277                (pattern_re is None or pattern_re.match (cur_base))):
278                print "removing %s" % self.files[i]
279                del self.files[i]
280
281
282    def read_template (self):
283        """Read and parse the manifest template file named by
284           'self.template' (usually "MANIFEST.in").  Process all file
285           specifications (include and exclude) in the manifest template
286           and add the resulting filenames to 'self.files'."""
287
288        assert self.files is not None and type (self.files) is ListType
289
290        template = TextFile (self.template,
291                             strip_comments=1,
292                             skip_blanks=1,
293                             join_lines=1,
294                             lstrip_ws=1,
295                             rstrip_ws=1,
296                             collapse_ws=1)
297
298        all_files = findall ()
299
300        while 1:
301
302            line = template.readline()
303            if line is None:            # end of file
304                break
305
306            words = string.split (line)
307            action = words[0]
308
309            # First, check that the right number of words are present
310            # for the given action (which is the first word)
311            if action in ('include','exclude',
312                          'global-include','global-exclude'):
313                if len (words) != 2:
314                    template.warn \
315                        ("invalid manifest template line: " +
316                         "'%s' expects a single <pattern>" %
317                         action)
318                    continue
319
320                pattern = native_path (words[1])
321
322            elif action in ('recursive-include','recursive-exclude'):
323                if len (words) != 3:
324                    template.warn \
325                        ("invalid manifest template line: " +
326                         "'%s' expects <dir> <pattern>" %
327                         action)
328                    continue
329
330                (dir, pattern) = map (native_path, words[1:3])
331
332            elif action in ('graft','prune'):
333                if len (words) != 2:
334                    template.warn \
335                        ("invalid manifest template line: " +
336                         "'%s' expects a single <dir_pattern>" %
337                         action)
338                    continue
339
340                dir_pattern = native_path (words[1])
341
342            else:
343                template.warn ("invalid manifest template line: " +
344                               "unknown action '%s'" % action)
345                continue
346
347            # OK, now we know that the action is valid and we have the
348            # right number of words on the line for that action -- so we
349            # can proceed with minimal error-checking.  Also, we have
350            # defined either (pattern), (dir and pattern), or
351            # (dir_pattern) -- so we don't have to spend any time
352            # digging stuff up out of 'words'.
353
354            if action == 'include':
355                print "include", pattern
356                files = select_pattern (all_files, pattern, anchor=1)
357                if not files:
358                    template.warn ("no files found matching '%s'" % pattern)
359                else:
360                    self.files.extend (files)
361
362            elif action == 'exclude':
363                print "exclude", pattern
364                num = exclude_pattern (self.files, pattern, anchor=1)
365                if num == 0:
366                    template.warn \
367                        ("no previously-included files found matching '%s'" %
368                         pattern)
369
370            elif action == 'global-include':
371                print "global-include", pattern
372                files = select_pattern (all_files, pattern, anchor=0)
373                if not files:
374                    template.warn (("no files found matching '%s' " +
375                                    "anywhere in distribution") %
376                                   pattern)
377                else:
378                    self.files.extend (files)
379
380            elif action == 'global-exclude':
381                print "global-exclude", pattern
382                num = exclude_pattern (self.files, pattern, anchor=0)
383                if num == 0:
384                    template.warn \
385                        (("no previously-included files matching '%s' " +
386                          "found anywhere in distribution") %
387                         pattern)
388
389            elif action == 'recursive-include':
390                print "recursive-include", dir, pattern
391                files = select_pattern (all_files, pattern, prefix=dir)
392                if not files:
393                    template.warn (("no files found matching '%s' " +
394                                    "under directory '%s'") %
395                                   (pattern, dir))
396                else:
397                    self.files.extend (files)
398
399            elif action == 'recursive-exclude':
400                print "recursive-exclude", dir, pattern
401                num = exclude_pattern (self.files, pattern, prefix=dir)
402                if num == 0:
403                    template.warn \
404                        (("no previously-included files matching '%s' " +
405                          "found under directory '%s'") %
406                         (pattern, dir))
407
408            elif action == 'graft':
409                print "graft", dir_pattern
410                files = select_pattern (all_files, None, prefix=dir_pattern)
411                if not files:
412                    template.warn ("no directories found matching '%s'" %
413                                   dir_pattern)
414                else:
415                    self.files.extend (files)
416
417            elif action == 'prune':
418                print "prune", dir_pattern
419                num = exclude_pattern (self.files, None, prefix=dir_pattern)
420                if num == 0:
421                    template.warn \
422                        (("no previously-included directories found " +
423                          "matching '%s'") %
424                         dir_pattern)
425            else:
426                raise RuntimeError, \
427                      "this cannot happen: invalid action '%s'" % action
428
429        # while loop over lines of template file
430
431    # read_template ()
432
433
434    def write_manifest (self):
435        """Write the file list in 'self.files' (presumably as filled in
436           by 'find_defaults()' and 'read_template()') to the manifest file
437           named by 'self.manifest'."""
438
439        manifest = open (self.manifest, "w")
440        for fn in self.files:
441            manifest.write (fn + '\n')
442        manifest.close ()
443
444    # write_manifest ()
445
446
447    def read_manifest (self):
448        """Read the manifest file (named by 'self.manifest') and use
449           it to fill in 'self.files', the list of files to include
450           in the source distribution."""
451
452        manifest = open (self.manifest)
453        while 1:
454            line = manifest.readline ()
455            if line == '':              # end of file
456                break
457            if line[-1] == '\n':
458                line = line[0:-1]
459            self.files.append (line)
460
461    # read_manifest ()
462
463
464
465    def make_release_tree (self, base_dir, files):
466
467        # Create all the directories under 'base_dir' necessary to
468        # put 'files' there.
469        create_tree (base_dir, files,
470                     verbose=self.verbose, dry_run=self.dry_run)
471
472        # And walk over the list of files, either making a hard link (if
473        # os.link exists) to each one that doesn't already exist in its
474        # corresponding location under 'base_dir', or copying each file
475        # that's out-of-date in 'base_dir'.  (Usually, all files will be
476        # out-of-date, because by default we blow away 'base_dir' when
477        # we're done making the distribution archives.)
478
479        if hasattr (os, 'link'):        # can make hard links on this system
480            link = 'hard'
481            msg = "making hard links in %s..." % base_dir
482        else:                           # nope, have to copy
483            link = None
484            msg = "copying files to %s..." % base_dir
485
486        self.announce (msg)
487        for file in files:
488            dest = os.path.join (base_dir, file)
489            self.copy_file (file, dest, link=link)
490
491    # make_release_tree ()
492
493
494    def make_distribution (self):
495
496        # Don't warn about missing meta-data here -- should be (and is!)
497        # done elsewhere.
498        base_dir = self.distribution.get_full_name()
499
500        # Remove any files that match "base_dir" from the fileset -- we
501        # don't want to go distributing the distribution inside itself!
502        self.exclude_pattern (base_dir + "*")
503
504        self.make_release_tree (base_dir, self.files)
505        for fmt in self.formats:
506            self.make_archive (base_dir, fmt, base_dir=base_dir)
507
508        if not self.keep_tree:
509            remove_tree (base_dir, self.verbose, self.dry_run)
510
511# class Dist
512
513
514# ----------------------------------------------------------------------
515# Utility functions
516
517def findall (dir = os.curdir):
518    """Find all files under 'dir' and return the list of full
519       filenames (relative to 'dir')."""
520
521    list = []
522    stack = [dir]
523    pop = stack.pop
524    push = stack.append
525
526    while stack:
527        dir = pop()
528        names = os.listdir (dir)
529
530        for name in names:
531            if dir != os.curdir:        # avoid the dreaded "./" syndrome
532                fullname = os.path.join (dir, name)
533            else:
534                fullname = name
535            list.append (fullname)
536            if os.path.isdir (fullname) and not os.path.islink(fullname):
537                push (fullname)
538
539    return list
540
541
542def select_pattern (files, pattern, anchor=1, prefix=None):
543    """Select strings (presumably filenames) from 'files' that match
544       'pattern', a Unix-style wildcard (glob) pattern.  Patterns are not
545       quite the same as implemented by the 'fnmatch' module: '*' and '?'
546       match non-special characters, where "special" is platform-dependent:
547       slash on Unix, colon, slash, and backslash on DOS/Windows, and colon
548       on Mac OS.
549
550       If 'anchor' is true (the default), then the pattern match is more
551       stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
552       'anchor' is false, both of these will match.
553
554       If 'prefix' is supplied, then only filenames starting with 'prefix'
555       (itself a pattern) and ending with 'pattern', with anything in
556       between them, will match.  'anchor' is ignored in this case.
557
558       Return the list of matching strings, possibly empty."""
559
560    matches = []
561    pattern_re = translate_pattern (pattern, anchor, prefix)
562    print "select_pattern: applying re %s" % pattern_re.pattern
563    for name in files:
564        if pattern_re.search (name):
565            matches.append (name)
566            print " adding", name
567
568    return matches
569
570# select_pattern ()
571
572
573def exclude_pattern (files, pattern, anchor=1, prefix=None):
574
575    pattern_re = translate_pattern (pattern, anchor, prefix)
576    print "exclude_pattern: applying re %s" % pattern_re.pattern
577    for i in range (len(files)-1, -1, -1):
578        if pattern_re.search (files[i]):
579            print " removing", files[i]
580            del files[i]
581
582# exclude_pattern ()
583
584
585def glob_to_re (pattern):
586    """Translate a shell-like glob pattern to a regular expression;
587       return a string containing the regex.  Differs from
588       'fnmatch.translate()' in that '*' does not match "special
589       characters" (which are platform-specific)."""
590    pattern_re = fnmatch.translate (pattern)
591
592    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
593    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
594    # and by extension they shouldn't match such "special characters" under
595    # any OS.  So change all non-escaped dots in the RE to match any
596    # character except the special characters.
597    # XXX currently the "special characters" are just slash -- i.e. this is
598    # Unix-only.
599    pattern_re = re.sub (r'(^|[^\\])\.', r'\1[^/]', pattern_re)
600    return pattern_re
601
602# glob_to_re ()
603
604
605def translate_pattern (pattern, anchor=1, prefix=None):
606    """Translate a shell-like wildcard pattern to a compiled regular
607       expression.    Return the compiled regex."""
608
609    if pattern:
610        pattern_re = glob_to_re (pattern)
611    else:
612        pattern_re = ''
613
614    if prefix is not None:
615        prefix_re = (glob_to_re (prefix))[0:-1] # ditch trailing $
616        pattern_re = "^" + os.path.join (prefix_re, ".*" + pattern_re)
617    else:                               # no prefix -- respect anchor flag
618        if anchor:
619            pattern_re = "^" + pattern_re
620
621    return re.compile (pattern_re)
622
623# translate_pattern ()
624