sdist.py revision 1b8e1d4c0dd1a1d738ffdac053446117f6d70656
1"""distutils.command.sdist 2 3Implements the Distutils 'sdist' command (create a source distribution).""" 4 5# created 1999/09/22, Greg Ward 6 7__revision__ = "$Id$" 8 9import sys, os, string, re 10import fnmatch 11from types import * 12from glob import glob 13from distutils.core import Command 14from distutils.util import newer, create_tree, remove_tree, native_path, \ 15 write_file 16from distutils.archive_util import check_archive_formats 17from distutils.text_file import TextFile 18from distutils.errors import DistutilsExecError, DistutilsOptionError 19 20 21class sdist (Command): 22 23 description = "create a source distribution (tarball, zip file, etc.)" 24 25 user_options = [ 26 ('template=', 't', 27 "name of manifest template file [default: MANIFEST.in]"), 28 ('manifest=', 'm', 29 "name of manifest file [default: MANIFEST]"), 30 ('use-defaults', None, 31 "include the default file set in the manifest " 32 "[default; disable with --no-defaults]"), 33 ('manifest-only', None, 34 "just regenerate the manifest and then stop"), 35 ('force-manifest', None, 36 "forcibly regenerate the manifest and carry on as usual"), 37 ('formats=', None, 38 "formats for source distribution (tar, ztar, gztar, bztar, or zip)"), 39 ('keep-tree', 'k', 40 "keep the distribution tree around after creating " + 41 "archive file(s)"), 42 ] 43 negative_opts = {'use-defaults': 'no-defaults'} 44 45 default_format = { 'posix': 'gztar', 46 'nt': 'zip' } 47 48 exclude_re = re.compile (r'\s*!\s*(\S+)') # for manifest lines 49 50 51 def initialize_options (self): 52 # 'template' and 'manifest' are, respectively, the names of 53 # the manifest template and manifest file. 54 self.template = None 55 self.manifest = None 56 57 # 'use_defaults': if true, we will include the default file set 58 # in the manifest 59 self.use_defaults = 1 60 61 self.manifest_only = 0 62 self.force_manifest = 0 63 64 self.formats = None 65 self.keep_tree = 0 66 67 68 def finalize_options (self): 69 if self.manifest is None: 70 self.manifest = "MANIFEST" 71 if self.template is None: 72 self.template = "MANIFEST.in" 73 74 if self.formats is None: 75 try: 76 self.formats = [self.default_format[os.name]] 77 except KeyError: 78 raise DistutilsPlatformError, \ 79 "don't know how to create source distributions " + \ 80 "on platform %s" % os.name 81 elif type (self.formats) is StringType: 82 self.formats = string.split (self.formats, ',') 83 84 bad_format = check_archive_formats (self.formats) 85 if bad_format: 86 raise DistutilsOptionError, \ 87 "unknown archive format '%s'" % bad_format 88 89 90 def run (self): 91 92 # 'files' is the list of files that will make up the manifest 93 self.files = [] 94 95 # Ensure that all required meta-data is given; warn if not (but 96 # don't die, it's not *that* serious!) 97 self.check_metadata () 98 99 # Do whatever it takes to get the list of files to process 100 # (process the manifest template, read an existing manifest, 101 # whatever). File list is put into 'self.files'. 102 self.get_file_list () 103 104 # If user just wanted us to regenerate the manifest, stop now. 105 if self.manifest_only: 106 return 107 108 # Otherwise, go ahead and create the source distribution tarball, 109 # or zipfile, or whatever. 110 self.make_distribution () 111 112 113 def check_metadata (self): 114 115 metadata = self.distribution.metadata 116 117 missing = [] 118 for attr in ('name', 'version', 'url'): 119 if not (hasattr (metadata, attr) and getattr (metadata, attr)): 120 missing.append (attr) 121 122 if missing: 123 self.warn ("missing required meta-data: " + 124 string.join (missing, ", ")) 125 126 if metadata.author: 127 if not metadata.author_email: 128 self.warn ("missing meta-data: if 'author' supplied, " + 129 "'author_email' must be supplied too") 130 elif metadata.maintainer: 131 if not metadata.maintainer_email: 132 self.warn ("missing meta-data: if 'maintainer' supplied, " + 133 "'maintainer_email' must be supplied too") 134 else: 135 self.warn ("missing meta-data: either (author and author_email) " + 136 "or (maintainer and maintainer_email) " + 137 "must be supplied") 138 139 # check_metadata () 140 141 142 def get_file_list (self): 143 """Figure out the list of files to include in the source 144 distribution, and put it in 'self.files'. This might 145 involve reading the manifest template (and writing the 146 manifest), or just reading the manifest, or just using 147 the default file set -- it all depends on the user's 148 options and the state of the filesystem.""" 149 150 151 template_exists = os.path.isfile (self.template) 152 if template_exists: 153 template_newer = newer (self.template, self.manifest) 154 155 # Regenerate the manifest if necessary (or if explicitly told to) 156 if ((template_exists and template_newer) or 157 self.force_manifest or 158 self.manifest_only): 159 160 if not template_exists: 161 self.warn (("manifest template '%s' does not exist " + 162 "(using default file list)") % 163 self.template) 164 165 # Add default file set to 'files' 166 if self.use_defaults: 167 self.find_defaults () 168 169 # Read manifest template if it exists 170 if template_exists: 171 self.read_template () 172 173 # File list now complete -- sort it so that higher-level files 174 # come first 175 sortable_files = map (os.path.split, self.files) 176 sortable_files.sort () 177 self.files = [] 178 for sort_tuple in sortable_files: 179 self.files.append (apply (os.path.join, sort_tuple)) 180 181 # Remove duplicates from the file list 182 for i in range (len(self.files)-1, 0, -1): 183 if self.files[i] == self.files[i-1]: 184 del self.files[i] 185 186 # And write complete file list (including default file set) to 187 # the manifest. 188 self.write_manifest () 189 190 # Don't regenerate the manifest, just read it in. 191 else: 192 self.read_manifest () 193 194 # get_file_list () 195 196 197 def find_defaults (self): 198 199 standards = [('README', 'README.txt'), 'setup.py'] 200 for fn in standards: 201 if type (fn) is TupleType: 202 alts = fn 203 got_it = 0 204 for fn in alts: 205 if os.path.exists (fn): 206 got_it = 1 207 self.files.append (fn) 208 break 209 210 if not got_it: 211 self.warn ("standard file not found: should have one of " + 212 string.join (alts, ', ')) 213 else: 214 if os.path.exists (fn): 215 self.files.append (fn) 216 else: 217 self.warn ("standard file '%s' not found" % fn) 218 219 optional = ['test/test*.py'] 220 for pattern in optional: 221 files = filter (os.path.isfile, glob (pattern)) 222 if files: 223 self.files.extend (files) 224 225 if self.distribution.has_pure_modules(): 226 build_py = self.find_peer ('build_py') 227 self.files.extend (build_py.get_source_files ()) 228 229 if self.distribution.has_ext_modules(): 230 build_ext = self.find_peer ('build_ext') 231 self.files.extend (build_ext.get_source_files ()) 232 233 if self.distribution.has_c_libraries(): 234 build_clib = self.find_peer ('build_clib') 235 self.files.extend (build_clib.get_source_files ()) 236 237 238 def search_dir (self, dir, pattern=None): 239 """Recursively find files under 'dir' matching 'pattern' (a string 240 containing a Unix-style glob pattern). If 'pattern' is None, 241 find all files under 'dir'. Return the list of found 242 filenames.""" 243 244 allfiles = findall (dir) 245 if pattern is None: 246 return allfiles 247 248 pattern_re = translate_pattern (pattern) 249 files = [] 250 for file in allfiles: 251 if pattern_re.match (os.path.basename (file)): 252 files.append (file) 253 254 return files 255 256 # search_dir () 257 258 259 def exclude_pattern (self, pattern): 260 """Remove filenames from 'self.files' that match 'pattern'.""" 261 print "exclude_pattern: pattern=%s" % pattern 262 pattern_re = translate_pattern (pattern) 263 for i in range (len (self.files)-1, -1, -1): 264 if pattern_re.match (self.files[i]): 265 print "removing %s" % self.files[i] 266 del self.files[i] 267 268 269 def recursive_exclude_pattern (self, dir, pattern=None): 270 """Remove filenames from 'self.files' that are under 'dir' 271 and whose basenames match 'pattern'.""" 272 273 print "recursive_exclude_pattern: dir=%s, pattern=%s" % (dir, pattern) 274 if pattern is None: 275 pattern_re = None 276 else: 277 pattern_re = translate_pattern (pattern) 278 279 for i in range (len (self.files)-1, -1, -1): 280 (cur_dir, cur_base) = os.path.split (self.files[i]) 281 if (cur_dir == dir and 282 (pattern_re is None or pattern_re.match (cur_base))): 283 print "removing %s" % self.files[i] 284 del self.files[i] 285 286 287 def read_template (self): 288 """Read and parse the manifest template file named by 289 'self.template' (usually "MANIFEST.in"). Process all file 290 specifications (include and exclude) in the manifest template 291 and add the resulting filenames to 'self.files'.""" 292 293 assert self.files is not None and type (self.files) is ListType 294 295 template = TextFile (self.template, 296 strip_comments=1, 297 skip_blanks=1, 298 join_lines=1, 299 lstrip_ws=1, 300 rstrip_ws=1, 301 collapse_ws=1) 302 303 all_files = findall () 304 305 while 1: 306 307 line = template.readline() 308 if line is None: # end of file 309 break 310 311 words = string.split (line) 312 action = words[0] 313 314 # First, check that the right number of words are present 315 # for the given action (which is the first word) 316 if action in ('include','exclude', 317 'global-include','global-exclude'): 318 if len (words) < 2: 319 template.warn \ 320 ("invalid manifest template line: " + 321 "'%s' expects <pattern1> <pattern2> ..." % 322 action) 323 continue 324 325 pattern_list = map(native_path, words[1:]) 326 327 elif action in ('recursive-include','recursive-exclude'): 328 if len (words) < 3: 329 template.warn \ 330 ("invalid manifest template line: " + 331 "'%s' expects <dir> <pattern1> <pattern2> ..." % 332 action) 333 continue 334 335 dir = native_path(words[1]) 336 pattern_list = map (native_path, words[2:]) 337 338 elif action in ('graft','prune'): 339 if len (words) != 2: 340 template.warn \ 341 ("invalid manifest template line: " + 342 "'%s' expects a single <dir_pattern>" % 343 action) 344 continue 345 346 dir_pattern = native_path (words[1]) 347 348 else: 349 template.warn ("invalid manifest template line: " + 350 "unknown action '%s'" % action) 351 continue 352 353 # OK, now we know that the action is valid and we have the 354 # right number of words on the line for that action -- so we 355 # can proceed with minimal error-checking. Also, we have 356 # defined either (pattern), (dir and pattern), or 357 # (dir_pattern) -- so we don't have to spend any time 358 # digging stuff up out of 'words'. 359 360 if action == 'include': 361 print "include", string.join(pattern_list) 362 for pattern in pattern_list: 363 files = select_pattern (all_files, pattern, anchor=1) 364 if not files: 365 template.warn ("no files found matching '%s'" % pattern) 366 else: 367 self.files.extend (files) 368 369 elif action == 'exclude': 370 print "exclude", string.join(pattern_list) 371 for pattern in pattern_list: 372 num = exclude_pattern (self.files, pattern, anchor=1) 373 if num == 0: 374 template.warn ( 375 "no previously-included files found matching '%s'"% 376 pattern) 377 378 elif action == 'global-include': 379 print "global-include", string.join(pattern_list) 380 for pattern in pattern_list: 381 files = select_pattern (all_files, pattern, anchor=0) 382 if not files: 383 template.warn (("no files found matching '%s' " + 384 "anywhere in distribution") % 385 pattern) 386 else: 387 self.files.extend (files) 388 389 elif action == 'global-exclude': 390 print "global-exclude", string.join(pattern_list) 391 for pattern in pattern_list: 392 num = exclude_pattern (self.files, pattern, anchor=0) 393 if num == 0: 394 template.warn \ 395 (("no previously-included files matching '%s' " + 396 "found anywhere in distribution") % 397 pattern) 398 399 elif action == 'recursive-include': 400 print "recursive-include", dir, string.join(pattern_list) 401 for pattern in pattern_list: 402 files = select_pattern (all_files, pattern, prefix=dir) 403 if not files: 404 template.warn (("no files found matching '%s' " + 405 "under directory '%s'") % 406 (pattern, dir)) 407 else: 408 self.files.extend (files) 409 410 elif action == 'recursive-exclude': 411 print "recursive-exclude", dir, string.join(pattern_list) 412 for pattern in pattern_list: 413 num = exclude_pattern (self.files, pattern, prefix=dir) 414 if num == 0: 415 template.warn \ 416 (("no previously-included files matching '%s' " + 417 "found under directory '%s'") % 418 (pattern, dir)) 419 420 elif action == 'graft': 421 print "graft", dir_pattern 422 files = select_pattern (all_files, None, prefix=dir_pattern) 423 if not files: 424 template.warn ("no directories found matching '%s'" % 425 dir_pattern) 426 else: 427 self.files.extend (files) 428 429 elif action == 'prune': 430 print "prune", dir_pattern 431 num = exclude_pattern (self.files, None, prefix=dir_pattern) 432 if num == 0: 433 template.warn \ 434 (("no previously-included directories found " + 435 "matching '%s'") % 436 dir_pattern) 437 else: 438 raise RuntimeError, \ 439 "this cannot happen: invalid action '%s'" % action 440 441 # while loop over lines of template file 442 443 # read_template () 444 445 446 def write_manifest (self): 447 """Write the file list in 'self.files' (presumably as filled in 448 by 'find_defaults()' and 'read_template()') to the manifest file 449 named by 'self.manifest'.""" 450 451 self.execute(write_file, 452 (self.manifest, self.files), 453 "writing manifest file") 454 455 # write_manifest () 456 457 458 def read_manifest (self): 459 """Read the manifest file (named by 'self.manifest') and use 460 it to fill in 'self.files', the list of files to include 461 in the source distribution.""" 462 463 manifest = open (self.manifest) 464 while 1: 465 line = manifest.readline () 466 if line == '': # end of file 467 break 468 if line[-1] == '\n': 469 line = line[0:-1] 470 self.files.append (line) 471 472 # read_manifest () 473 474 475 476 def make_release_tree (self, base_dir, files): 477 478 # Create all the directories under 'base_dir' necessary to 479 # put 'files' there. 480 create_tree (base_dir, files, 481 verbose=self.verbose, dry_run=self.dry_run) 482 483 # And walk over the list of files, either making a hard link (if 484 # os.link exists) to each one that doesn't already exist in its 485 # corresponding location under 'base_dir', or copying each file 486 # that's out-of-date in 'base_dir'. (Usually, all files will be 487 # out-of-date, because by default we blow away 'base_dir' when 488 # we're done making the distribution archives.) 489 490 if hasattr (os, 'link'): # can make hard links on this system 491 link = 'hard' 492 msg = "making hard links in %s..." % base_dir 493 else: # nope, have to copy 494 link = None 495 msg = "copying files to %s..." % base_dir 496 497 self.announce (msg) 498 for file in files: 499 dest = os.path.join (base_dir, file) 500 self.copy_file (file, dest, link=link) 501 502 # make_release_tree () 503 504 505 def make_distribution (self): 506 507 # Don't warn about missing meta-data here -- should be (and is!) 508 # done elsewhere. 509 base_dir = self.distribution.get_fullname() 510 511 # Remove any files that match "base_dir" from the fileset -- we 512 # don't want to go distributing the distribution inside itself! 513 self.exclude_pattern (base_dir + "*") 514 515 self.make_release_tree (base_dir, self.files) 516 for fmt in self.formats: 517 self.make_archive (base_dir, fmt, base_dir=base_dir) 518 519 if not self.keep_tree: 520 remove_tree (base_dir, self.verbose, self.dry_run) 521 522# class Dist 523 524 525# ---------------------------------------------------------------------- 526# Utility functions 527 528def findall (dir = os.curdir): 529 """Find all files under 'dir' and return the list of full 530 filenames (relative to 'dir').""" 531 532 list = [] 533 stack = [dir] 534 pop = stack.pop 535 push = stack.append 536 537 while stack: 538 dir = pop() 539 names = os.listdir (dir) 540 541 for name in names: 542 if dir != os.curdir: # avoid the dreaded "./" syndrome 543 fullname = os.path.join (dir, name) 544 else: 545 fullname = name 546 list.append (fullname) 547 if os.path.isdir (fullname) and not os.path.islink(fullname): 548 push (fullname) 549 550 return list 551 552 553def select_pattern (files, pattern, anchor=1, prefix=None): 554 """Select strings (presumably filenames) from 'files' that match 555 'pattern', a Unix-style wildcard (glob) pattern. Patterns are not 556 quite the same as implemented by the 'fnmatch' module: '*' and '?' 557 match non-special characters, where "special" is platform-dependent: 558 slash on Unix, colon, slash, and backslash on DOS/Windows, and colon 559 on Mac OS. 560 561 If 'anchor' is true (the default), then the pattern match is more 562 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If 563 'anchor' is false, both of these will match. 564 565 If 'prefix' is supplied, then only filenames starting with 'prefix' 566 (itself a pattern) and ending with 'pattern', with anything in 567 between them, will match. 'anchor' is ignored in this case. 568 569 Return the list of matching strings, possibly empty.""" 570 571 matches = [] 572 pattern_re = translate_pattern (pattern, anchor, prefix) 573 print "select_pattern: applying re %s" % pattern_re.pattern 574 for name in files: 575 if pattern_re.search (name): 576 matches.append (name) 577 print " adding", name 578 579 return matches 580 581# select_pattern () 582 583 584def exclude_pattern (files, pattern, anchor=1, prefix=None): 585 586 pattern_re = translate_pattern (pattern, anchor, prefix) 587 print "exclude_pattern: applying re %s" % pattern_re.pattern 588 for i in range (len(files)-1, -1, -1): 589 if pattern_re.search (files[i]): 590 print " removing", files[i] 591 del files[i] 592 593# exclude_pattern () 594 595 596def glob_to_re (pattern): 597 """Translate a shell-like glob pattern to a regular expression; 598 return a string containing the regex. Differs from 599 'fnmatch.translate()' in that '*' does not match "special 600 characters" (which are platform-specific).""" 601 pattern_re = fnmatch.translate (pattern) 602 603 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which 604 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, 605 # and by extension they shouldn't match such "special characters" under 606 # any OS. So change all non-escaped dots in the RE to match any 607 # character except the special characters. 608 # XXX currently the "special characters" are just slash -- i.e. this is 609 # Unix-only. 610 pattern_re = re.sub (r'(^|[^\\])\.', r'\1[^/]', pattern_re) 611 return pattern_re 612 613# glob_to_re () 614 615 616def translate_pattern (pattern, anchor=1, prefix=None): 617 """Translate a shell-like wildcard pattern to a compiled regular 618 expression. Return the compiled regex.""" 619 620 if pattern: 621 pattern_re = glob_to_re (pattern) 622 else: 623 pattern_re = '' 624 625 if prefix is not None: 626 prefix_re = (glob_to_re (prefix))[0:-1] # ditch trailing $ 627 pattern_re = "^" + os.path.join (prefix_re, ".*" + pattern_re) 628 else: # no prefix -- respect anchor flag 629 if anchor: 630 pattern_re = "^" + pattern_re 631 632 return re.compile (pattern_re) 633 634# translate_pattern () 635