sdist.py revision 4a7319ca286e78ae9ddf9f86a50eee3eab813855
1"""distutils.command.sdist 2 3Implements the Distutils 'sdist' command (create a source distribution).""" 4 5# created 1999/09/22, Greg Ward 6 7__revision__ = "$Id$" 8 9import sys, os, string, re 10import fnmatch 11from types import * 12from glob import glob 13from distutils.core import Command 14from distutils.util import \ 15 convert_path, create_tree, remove_tree, newer, write_file, \ 16 check_archive_formats, ARCHIVE_FORMATS 17from distutils.text_file import TextFile 18from distutils.errors import DistutilsExecError, DistutilsOptionError 19 20 21class sdist (Command): 22 23 description = "create a source distribution (tarball, zip file, etc.)" 24 25 user_options = [ 26 ('template=', 't', 27 "name of manifest template file [default: MANIFEST.in]"), 28 ('manifest=', 'm', 29 "name of manifest file [default: MANIFEST]"), 30 ('use-defaults', None, 31 "include the default file set in the manifest " 32 "[default; disable with --no-defaults]"), 33 ('manifest-only', 'o', 34 "just regenerate the manifest and then stop " 35 "(implies --force-manifest)"), 36 ('force-manifest', 'f', 37 "forcibly regenerate the manifest and carry on as usual"), 38 ('formats=', None, 39 "formats for source distribution"), 40 ('keep-tree', 'k', 41 "keep the distribution tree around after creating " + 42 "archive file(s)"), 43 ] 44 45 46 # XXX ugh: this has to precede the 'help_options' list, because 47 # it is mentioned there -- also, this is not a method, even though 48 # it's defined in a class: double-ugh! 49 def show_formats (): 50 """Print all possible values for the 'formats' option -- used by 51 the "--help-formats" command-line option. 52 """ 53 from distutils.fancy_getopt import FancyGetopt 54 formats=[] 55 for format in ARCHIVE_FORMATS.keys(): 56 formats.append(("formats="+format,None,ARCHIVE_FORMATS[format][2])) 57 formats.sort() 58 pretty_printer = FancyGetopt(formats) 59 pretty_printer.print_help( 60 "List of available source distribution formats:") 61 62 help_options = [ 63 ('help-formats', None, 64 "lists available distribution formats", show_formats), 65 ] 66 67 negative_opts = {'use-defaults': 'no-defaults'} 68 69 default_format = { 'posix': 'gztar', 70 'nt': 'zip' } 71 72 73 def initialize_options (self): 74 # 'template' and 'manifest' are, respectively, the names of 75 # the manifest template and manifest file. 76 self.template = None 77 self.manifest = None 78 79 # 'use_defaults': if true, we will include the default file set 80 # in the manifest 81 self.use_defaults = 1 82 83 self.manifest_only = 0 84 self.force_manifest = 0 85 86 self.formats = None 87 self.keep_tree = 0 88 89 self.archive_files = None 90 91 92 def finalize_options (self): 93 if self.manifest is None: 94 self.manifest = "MANIFEST" 95 if self.template is None: 96 self.template = "MANIFEST.in" 97 98 self.ensure_string_list('formats') 99 if self.formats is None: 100 try: 101 self.formats = [self.default_format[os.name]] 102 except KeyError: 103 raise DistutilsPlatformError, \ 104 "don't know how to create source distributions " + \ 105 "on platform %s" % os.name 106 107 bad_format = check_archive_formats (self.formats) 108 if bad_format: 109 raise DistutilsOptionError, \ 110 "unknown archive format '%s'" % bad_format 111 112 113 def run (self): 114 115 # 'files' is the list of files that will make up the manifest 116 self.files = [] 117 118 # Ensure that all required meta-data is given; warn if not (but 119 # don't die, it's not *that* serious!) 120 self.check_metadata () 121 122 # Do whatever it takes to get the list of files to process 123 # (process the manifest template, read an existing manifest, 124 # whatever). File list is put into 'self.files'. 125 self.get_file_list () 126 127 # If user just wanted us to regenerate the manifest, stop now. 128 if self.manifest_only: 129 return 130 131 # Otherwise, go ahead and create the source distribution tarball, 132 # or zipfile, or whatever. 133 self.make_distribution () 134 135 136 def check_metadata (self): 137 """Ensure that all required elements of meta-data (name, version, 138 URL, (author and author_email) or (maintainer and 139 maintainer_email)) are supplied by the Distribution object; warn if 140 any are missing. 141 """ 142 metadata = self.distribution.metadata 143 144 missing = [] 145 for attr in ('name', 'version', 'url'): 146 if not (hasattr (metadata, attr) and getattr (metadata, attr)): 147 missing.append (attr) 148 149 if missing: 150 self.warn ("missing required meta-data: " + 151 string.join (missing, ", ")) 152 153 if metadata.author: 154 if not metadata.author_email: 155 self.warn ("missing meta-data: if 'author' supplied, " + 156 "'author_email' must be supplied too") 157 elif metadata.maintainer: 158 if not metadata.maintainer_email: 159 self.warn ("missing meta-data: if 'maintainer' supplied, " + 160 "'maintainer_email' must be supplied too") 161 else: 162 self.warn ("missing meta-data: either (author and author_email) " + 163 "or (maintainer and maintainer_email) " + 164 "must be supplied") 165 166 # check_metadata () 167 168 169 def get_file_list (self): 170 """Figure out the list of files to include in the source 171 distribution, and put it in 'self.files'. This might involve 172 reading the manifest template (and writing the manifest), or just 173 reading the manifest, or just using the default file set -- it all 174 depends on the user's options and the state of the filesystem. 175 """ 176 template_exists = os.path.isfile (self.template) 177 if template_exists: 178 template_newer = newer (self.template, self.manifest) 179 180 # Regenerate the manifest if necessary (or if explicitly told to) 181 if ((template_exists and template_newer) or 182 self.force_manifest or 183 self.manifest_only): 184 185 if not template_exists: 186 self.warn (("manifest template '%s' does not exist " + 187 "(using default file list)") % 188 self.template) 189 190 # Add default file set to 'files' 191 if self.use_defaults: 192 self.add_defaults () 193 194 # Read manifest template if it exists 195 if template_exists: 196 self.read_template () 197 198 # File list now complete -- sort it so that higher-level files 199 # come first 200 sortable_files = map (os.path.split, self.files) 201 sortable_files.sort () 202 self.files = [] 203 for sort_tuple in sortable_files: 204 self.files.append (apply (os.path.join, sort_tuple)) 205 206 # Remove duplicates from the file list 207 for i in range (len(self.files)-1, 0, -1): 208 if self.files[i] == self.files[i-1]: 209 del self.files[i] 210 211 # And write complete file list (including default file set) to 212 # the manifest. 213 self.write_manifest () 214 215 # Don't regenerate the manifest, just read it in. 216 else: 217 self.read_manifest () 218 219 # get_file_list () 220 221 222 def add_defaults (self): 223 """Add all the default files to self.files: 224 - README or README.txt 225 - setup.py 226 - test/test*.py 227 - all pure Python modules mentioned in setup script 228 - all C sources listed as part of extensions or C libraries 229 in the setup script (doesn't catch C headers!) 230 Warns if (README or README.txt) or setup.py are missing; everything 231 else is optional. 232 """ 233 standards = [('README', 'README.txt'), 'setup.py'] 234 for fn in standards: 235 if type (fn) is TupleType: 236 alts = fn 237 got_it = 0 238 for fn in alts: 239 if os.path.exists (fn): 240 got_it = 1 241 self.files.append (fn) 242 break 243 244 if not got_it: 245 self.warn ("standard file not found: should have one of " + 246 string.join (alts, ', ')) 247 else: 248 if os.path.exists (fn): 249 self.files.append (fn) 250 else: 251 self.warn ("standard file '%s' not found" % fn) 252 253 optional = ['test/test*.py'] 254 for pattern in optional: 255 files = filter (os.path.isfile, glob (pattern)) 256 if files: 257 self.files.extend (files) 258 259 if self.distribution.has_pure_modules(): 260 build_py = self.get_finalized_command ('build_py') 261 self.files.extend (build_py.get_source_files ()) 262 263 if self.distribution.has_ext_modules(): 264 build_ext = self.get_finalized_command ('build_ext') 265 self.files.extend (build_ext.get_source_files ()) 266 267 if self.distribution.has_c_libraries(): 268 build_clib = self.get_finalized_command ('build_clib') 269 self.files.extend (build_clib.get_source_files ()) 270 271 # add_defaults () 272 273 274 def search_dir (self, dir, pattern=None): 275 """Recursively find files under 'dir' matching 'pattern' (a string 276 containing a Unix-style glob pattern). If 'pattern' is None, find 277 all files under 'dir'. Return the list of found filenames. 278 """ 279 allfiles = findall (dir) 280 if pattern is None: 281 return allfiles 282 283 pattern_re = translate_pattern (pattern) 284 files = [] 285 for file in allfiles: 286 if pattern_re.match (os.path.basename (file)): 287 files.append (file) 288 289 return files 290 291 # search_dir () 292 293 294 def recursive_exclude_pattern (self, dir, pattern=None): 295 """Remove filenames from 'self.files' that are under 'dir' and 296 whose basenames match 'pattern'. 297 """ 298 self.debug_print("recursive_exclude_pattern: dir=%s, pattern=%s" % 299 (dir, pattern)) 300 if pattern is None: 301 pattern_re = None 302 else: 303 pattern_re = translate_pattern (pattern) 304 305 for i in range (len (self.files)-1, -1, -1): 306 (cur_dir, cur_base) = os.path.split (self.files[i]) 307 if (cur_dir == dir and 308 (pattern_re is None or pattern_re.match (cur_base))): 309 self.debug_print("removing %s" % self.files[i]) 310 del self.files[i] 311 312 313 def read_template (self): 314 """Read and parse the manifest template file named by 315 'self.template' (usually "MANIFEST.in"). Process all file 316 specifications (include and exclude) in the manifest template and 317 update 'self.files' accordingly (filenames may be added to 318 or removed from 'self.files' based on the manifest template). 319 """ 320 assert self.files is not None and type (self.files) is ListType 321 self.announce("reading manifest template '%s'" % self.template) 322 323 template = TextFile (self.template, 324 strip_comments=1, 325 skip_blanks=1, 326 join_lines=1, 327 lstrip_ws=1, 328 rstrip_ws=1, 329 collapse_ws=1) 330 331 all_files = findall () 332 333 while 1: 334 335 line = template.readline() 336 if line is None: # end of file 337 break 338 339 words = string.split (line) 340 action = words[0] 341 342 # First, check that the right number of words are present 343 # for the given action (which is the first word) 344 if action in ('include','exclude', 345 'global-include','global-exclude'): 346 if len (words) < 2: 347 template.warn \ 348 ("invalid manifest template line: " + 349 "'%s' expects <pattern1> <pattern2> ..." % 350 action) 351 continue 352 353 pattern_list = map(convert_path, words[1:]) 354 355 elif action in ('recursive-include','recursive-exclude'): 356 if len (words) < 3: 357 template.warn \ 358 ("invalid manifest template line: " + 359 "'%s' expects <dir> <pattern1> <pattern2> ..." % 360 action) 361 continue 362 363 dir = convert_path(words[1]) 364 pattern_list = map (convert_path, words[2:]) 365 366 elif action in ('graft','prune'): 367 if len (words) != 2: 368 template.warn \ 369 ("invalid manifest template line: " + 370 "'%s' expects a single <dir_pattern>" % 371 action) 372 continue 373 374 dir_pattern = convert_path (words[1]) 375 376 else: 377 template.warn ("invalid manifest template line: " + 378 "unknown action '%s'" % action) 379 continue 380 381 # OK, now we know that the action is valid and we have the 382 # right number of words on the line for that action -- so we 383 # can proceed with minimal error-checking. Also, we have 384 # defined either (pattern), (dir and pattern), or 385 # (dir_pattern) -- so we don't have to spend any time 386 # digging stuff up out of 'words'. 387 388 if action == 'include': 389 self.debug_print("include " + string.join(pattern_list)) 390 for pattern in pattern_list: 391 files = self.select_pattern (all_files, pattern, anchor=1) 392 if not files: 393 template.warn ("no files found matching '%s'" % 394 pattern) 395 else: 396 self.files.extend (files) 397 398 elif action == 'exclude': 399 self.debug_print("exclude " + string.join(pattern_list)) 400 for pattern in pattern_list: 401 num = self.exclude_pattern (self.files, pattern, anchor=1) 402 if num == 0: 403 template.warn ( 404 "no previously-included files found matching '%s'"% 405 pattern) 406 407 elif action == 'global-include': 408 self.debug_print("global-include " + string.join(pattern_list)) 409 for pattern in pattern_list: 410 files = self.select_pattern (all_files, pattern, anchor=0) 411 if not files: 412 template.warn (("no files found matching '%s' " + 413 "anywhere in distribution") % 414 pattern) 415 else: 416 self.files.extend (files) 417 418 elif action == 'global-exclude': 419 self.debug_print("global-exclude " + string.join(pattern_list)) 420 for pattern in pattern_list: 421 num = self.exclude_pattern (self.files, pattern, anchor=0) 422 if num == 0: 423 template.warn \ 424 (("no previously-included files matching '%s' " + 425 "found anywhere in distribution") % 426 pattern) 427 428 elif action == 'recursive-include': 429 self.debug_print("recursive-include %s %s" % 430 (dir, string.join(pattern_list))) 431 for pattern in pattern_list: 432 files = self.select_pattern ( 433 all_files, pattern, prefix=dir) 434 if not files: 435 template.warn (("no files found matching '%s' " + 436 "under directory '%s'") % 437 (pattern, dir)) 438 else: 439 self.files.extend (files) 440 441 elif action == 'recursive-exclude': 442 self.debug_print("recursive-exclude %s %s" % 443 (dir, string.join(pattern_list))) 444 for pattern in pattern_list: 445 num = self.exclude_pattern( 446 self.files, pattern, prefix=dir) 447 if num == 0: 448 template.warn \ 449 (("no previously-included files matching '%s' " + 450 "found under directory '%s'") % 451 (pattern, dir)) 452 453 elif action == 'graft': 454 self.debug_print("graft " + dir_pattern) 455 files = self.select_pattern( 456 all_files, None, prefix=dir_pattern) 457 if not files: 458 template.warn ("no directories found matching '%s'" % 459 dir_pattern) 460 else: 461 self.files.extend (files) 462 463 elif action == 'prune': 464 self.debug_print("prune " + dir_pattern) 465 num = self.exclude_pattern( 466 self.files, None, prefix=dir_pattern) 467 if num == 0: 468 template.warn \ 469 (("no previously-included directories found " + 470 "matching '%s'") % 471 dir_pattern) 472 else: 473 raise RuntimeError, \ 474 "this cannot happen: invalid action '%s'" % action 475 476 # while loop over lines of template file 477 478 # Prune away the build and source distribution directories 479 build = self.get_finalized_command ('build') 480 self.exclude_pattern (self.files, None, prefix=build.build_base) 481 482 base_dir = self.distribution.get_fullname() 483 self.exclude_pattern (self.files, None, prefix=base_dir) 484 485 # read_template () 486 487 488 def select_pattern (self, files, pattern, anchor=1, prefix=None): 489 """Select strings (presumably filenames) from 'files' that match 490 'pattern', a Unix-style wildcard (glob) pattern. Patterns are not 491 quite the same as implemented by the 'fnmatch' module: '*' and '?' 492 match non-special characters, where "special" is platform-dependent: 493 slash on Unix, colon, slash, and backslash on DOS/Windows, and colon on 494 Mac OS. 495 496 If 'anchor' is true (the default), then the pattern match is more 497 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If 498 'anchor' is false, both of these will match. 499 500 If 'prefix' is supplied, then only filenames starting with 'prefix' 501 (itself a pattern) and ending with 'pattern', with anything in between 502 them, will match. 'anchor' is ignored in this case. 503 504 Return the list of matching strings, possibly empty. 505 """ 506 matches = [] 507 pattern_re = translate_pattern (pattern, anchor, prefix) 508 self.debug_print("select_pattern: applying regex r'%s'" % 509 pattern_re.pattern) 510 for name in files: 511 if pattern_re.search (name): 512 matches.append (name) 513 self.debug_print(" adding " + name) 514 515 return matches 516 517 # select_pattern () 518 519 520 def exclude_pattern (self, files, pattern, anchor=1, prefix=None): 521 """Remove strings (presumably filenames) from 'files' that match 522 'pattern'. 'pattern', 'anchor', 'and 'prefix' are the same 523 as for 'select_pattern()', above. The list 'files' is modified 524 in place. 525 """ 526 pattern_re = translate_pattern (pattern, anchor, prefix) 527 self.debug_print("exclude_pattern: applying regex r'%s'" % 528 pattern_re.pattern) 529 for i in range (len(files)-1, -1, -1): 530 if pattern_re.search (files[i]): 531 self.debug_print(" removing " + files[i]) 532 del files[i] 533 534 # exclude_pattern () 535 536 537 def write_manifest (self): 538 """Write the file list in 'self.files' (presumably as filled in by 539 'add_defaults()' and 'read_template()') to the manifest file named 540 by 'self.manifest'. 541 """ 542 self.execute(write_file, 543 (self.manifest, self.files), 544 "writing manifest file '%s'" % self.manifest) 545 546 # write_manifest () 547 548 549 def read_manifest (self): 550 """Read the manifest file (named by 'self.manifest') and use it to 551 fill in 'self.files', the list of files to include in the source 552 distribution. 553 """ 554 self.announce("reading manifest file '%s'" % self.manifest) 555 manifest = open (self.manifest) 556 while 1: 557 line = manifest.readline () 558 if line == '': # end of file 559 break 560 if line[-1] == '\n': 561 line = line[0:-1] 562 self.files.append (line) 563 564 # read_manifest () 565 566 567 def make_release_tree (self, base_dir, files): 568 """Create the directory tree that will become the source 569 distribution archive. All directories implied by the filenames in 570 'files' are created under 'base_dir', and then we hard link or copy 571 (if hard linking is unavailable) those files into place. 572 Essentially, this duplicates the developer's source tree, but in a 573 directory named after the distribution, containing only the files 574 to be distributed. 575 """ 576 # Create all the directories under 'base_dir' necessary to 577 # put 'files' there. 578 create_tree (base_dir, files, 579 verbose=self.verbose, dry_run=self.dry_run) 580 581 # And walk over the list of files, either making a hard link (if 582 # os.link exists) to each one that doesn't already exist in its 583 # corresponding location under 'base_dir', or copying each file 584 # that's out-of-date in 'base_dir'. (Usually, all files will be 585 # out-of-date, because by default we blow away 'base_dir' when 586 # we're done making the distribution archives.) 587 588 if hasattr (os, 'link'): # can make hard links on this system 589 link = 'hard' 590 msg = "making hard links in %s..." % base_dir 591 else: # nope, have to copy 592 link = None 593 msg = "copying files to %s..." % base_dir 594 595 self.announce (msg) 596 for file in files: 597 dest = os.path.join (base_dir, file) 598 self.copy_file (file, dest, link=link) 599 600 # make_release_tree () 601 602 603 def make_distribution (self): 604 """Create the source distribution(s). First, we create the release 605 tree with 'make_release_tree()'; then, we create all required 606 archive files (according to 'self.formats') from the release tree. 607 Finally, we clean up by blowing away the release tree (unless 608 'self.keep_tree' is true). The list of archive files created is 609 stored so it can be retrieved later by 'get_archive_files()'. 610 """ 611 # Don't warn about missing meta-data here -- should be (and is!) 612 # done elsewhere. 613 base_dir = self.distribution.get_fullname() 614 615 # Remove any files that match "base_dir" from the fileset -- we 616 # don't want to go distributing the distribution inside itself! 617 self.exclude_pattern (self.files, base_dir + "*") 618 619 self.make_release_tree (base_dir, self.files) 620 archive_files = [] # remember names of files we create 621 for fmt in self.formats: 622 file = self.make_archive (base_dir, fmt, base_dir=base_dir) 623 archive_files.append(file) 624 625 self.archive_files = archive_files 626 627 if not self.keep_tree: 628 remove_tree (base_dir, self.verbose, self.dry_run) 629 630 def get_archive_files (self): 631 """Return the list of archive files created when the command 632 was run, or None if the command hasn't run yet. 633 """ 634 return self.archive_files 635 636# class sdist 637 638 639# ---------------------------------------------------------------------- 640# Utility functions 641 642def findall (dir = os.curdir): 643 """Find all files under 'dir' and return the list of full filenames 644 (relative to 'dir'). 645 """ 646 list = [] 647 stack = [dir] 648 pop = stack.pop 649 push = stack.append 650 651 while stack: 652 dir = pop() 653 names = os.listdir (dir) 654 655 for name in names: 656 if dir != os.curdir: # avoid the dreaded "./" syndrome 657 fullname = os.path.join (dir, name) 658 else: 659 fullname = name 660 list.append (fullname) 661 if os.path.isdir (fullname) and not os.path.islink(fullname): 662 push (fullname) 663 664 return list 665 666 667def glob_to_re (pattern): 668 """Translate a shell-like glob pattern to a regular expression; return 669 a string containing the regex. Differs from 'fnmatch.translate()' in 670 that '*' does not match "special characters" (which are 671 platform-specific). 672 """ 673 pattern_re = fnmatch.translate (pattern) 674 675 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which 676 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, 677 # and by extension they shouldn't match such "special characters" under 678 # any OS. So change all non-escaped dots in the RE to match any 679 # character except the special characters. 680 # XXX currently the "special characters" are just slash -- i.e. this is 681 # Unix-only. 682 pattern_re = re.sub (r'(^|[^\\])\.', r'\1[^/]', pattern_re) 683 return pattern_re 684 685# glob_to_re () 686 687 688def translate_pattern (pattern, anchor=1, prefix=None): 689 """Translate a shell-like wildcard pattern to a compiled regular 690 expression. Return the compiled regex. 691 """ 692 if pattern: 693 pattern_re = glob_to_re (pattern) 694 else: 695 pattern_re = '' 696 697 if prefix is not None: 698 prefix_re = (glob_to_re (prefix))[0:-1] # ditch trailing $ 699 pattern_re = "^" + os.path.join (prefix_re, ".*" + pattern_re) 700 else: # no prefix -- respect anchor flag 701 if anchor: 702 pattern_re = "^" + pattern_re 703 704 return re.compile (pattern_re) 705 706# translate_pattern () 707