util.py revision b0df6a1afa585c5f4d097aeb68e41996a9fff9d7
1"""distutils.util 2 3Miscellaneous utility functions -- anything that doesn't fit into 4one of the other *util.py modules. 5""" 6 7# created 1999/03/08, Greg Ward 8 9__revision__ = "$Id$" 10 11import sys, os, string, re 12from distutils.errors import DistutilsPlatformError 13from distutils.dep_util import newer 14from distutils.spawn import spawn 15from distutils import log 16 17def get_platform (): 18 """Return a string that identifies the current platform. This is used 19 mainly to distinguish platform-specific build directories and 20 platform-specific built distributions. Typically includes the OS name 21 and version and the architecture (as supplied by 'os.uname()'), 22 although the exact information included depends on the OS; eg. for IRIX 23 the architecture isn't particularly important (IRIX only runs on SGI 24 hardware), but for Linux the kernel version isn't particularly 25 important. 26 27 Examples of returned values: 28 linux-i586 29 linux-alpha (?) 30 solaris-2.6-sun4u 31 irix-5.3 32 irix64-6.2 33 34 For non-POSIX platforms, currently just returns 'sys.platform'. 35 """ 36 if os.name != "posix" or not hasattr(os, 'uname'): 37 # XXX what about the architecture? NT is Intel or Alpha, 38 # Mac OS is M68k or PPC, etc. 39 return sys.platform 40 41 # Try to distinguish various flavours of Unix 42 43 (osname, host, release, version, machine) = os.uname() 44 45 # Convert the OS name to lowercase and remove '/' characters 46 # (to accommodate BSD/OS) 47 osname = string.lower(osname) 48 osname = string.replace(osname, '/', '') 49 50 if osname[:5] == "linux": 51 # At least on Linux/Intel, 'machine' is the processor -- 52 # i386, etc. 53 # XXX what about Alpha, SPARC, etc? 54 return "%s-%s" % (osname, machine) 55 elif osname[:5] == "sunos": 56 if release[0] >= "5": # SunOS 5 == Solaris 2 57 osname = "solaris" 58 release = "%d.%s" % (int(release[0]) - 3, release[2:]) 59 # fall through to standard osname-release-machine representation 60 elif osname[:4] == "irix": # could be "irix64"! 61 return "%s-%s" % (osname, release) 62 elif osname[:3] == "aix": 63 return "%s-%s.%s" % (osname, version, release) 64 elif osname[:6] == "cygwin": 65 osname = "cygwin" 66 rel_re = re.compile (r'[\d.]+') 67 m = rel_re.match(release) 68 if m: 69 release = m.group() 70 71 return "%s-%s-%s" % (osname, release, machine) 72 73# get_platform () 74 75 76def convert_path (pathname): 77 """Return 'pathname' as a name that will work on the native filesystem, 78 i.e. split it on '/' and put it back together again using the current 79 directory separator. Needed because filenames in the setup script are 80 always supplied in Unix style, and have to be converted to the local 81 convention before we can actually use them in the filesystem. Raises 82 ValueError on non-Unix-ish systems if 'pathname' either starts or 83 ends with a slash. 84 """ 85 if os.sep == '/': 86 return pathname 87 if not pathname: 88 return pathname 89 if pathname[0] == '/': 90 raise ValueError, "path '%s' cannot be absolute" % pathname 91 if pathname[-1] == '/': 92 raise ValueError, "path '%s' cannot end with '/'" % pathname 93 94 paths = string.split(pathname, '/') 95 while '.' in paths: 96 paths.remove('.') 97 if not paths: 98 return os.curdir 99 return apply(os.path.join, paths) 100 101# convert_path () 102 103 104def change_root (new_root, pathname): 105 """Return 'pathname' with 'new_root' prepended. If 'pathname' is 106 relative, this is equivalent to "os.path.join(new_root,pathname)". 107 Otherwise, it requires making 'pathname' relative and then joining the 108 two, which is tricky on DOS/Windows and Mac OS. 109 """ 110 if os.name == 'posix': 111 if not os.path.isabs(pathname): 112 return os.path.join(new_root, pathname) 113 else: 114 return os.path.join(new_root, pathname[1:]) 115 116 elif os.name == 'nt': 117 (drive, path) = os.path.splitdrive(pathname) 118 if path[0] == '\\': 119 path = path[1:] 120 return os.path.join(new_root, path) 121 122 elif os.name == 'os2': 123 (drive, path) = os.path.splitdrive(pathname) 124 if path[0] == os.sep: 125 path = path[1:] 126 return os.path.join(new_root, path) 127 128 elif os.name == 'mac': 129 if not os.path.isabs(pathname): 130 return os.path.join(new_root, pathname) 131 else: 132 # Chop off volume name from start of path 133 elements = string.split(pathname, ":", 1) 134 pathname = ":" + elements[1] 135 return os.path.join(new_root, pathname) 136 137 else: 138 raise DistutilsPlatformError, \ 139 "nothing known about platform '%s'" % os.name 140 141 142_environ_checked = 0 143def check_environ (): 144 """Ensure that 'os.environ' has all the environment variables we 145 guarantee that users can use in config files, command-line options, 146 etc. Currently this includes: 147 HOME - user's home directory (Unix only) 148 PLAT - description of the current platform, including hardware 149 and OS (see 'get_platform()') 150 """ 151 global _environ_checked 152 if _environ_checked: 153 return 154 155 if os.name == 'posix' and not os.environ.has_key('HOME'): 156 import pwd 157 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] 158 159 if not os.environ.has_key('PLAT'): 160 os.environ['PLAT'] = get_platform() 161 162 _environ_checked = 1 163 164 165def subst_vars (s, local_vars): 166 """Perform shell/Perl-style variable substitution on 'string'. Every 167 occurrence of '$' followed by a name is considered a variable, and 168 variable is substituted by the value found in the 'local_vars' 169 dictionary, or in 'os.environ' if it's not in 'local_vars'. 170 'os.environ' is first checked/augmented to guarantee that it contains 171 certain values: see 'check_environ()'. Raise ValueError for any 172 variables not found in either 'local_vars' or 'os.environ'. 173 """ 174 check_environ() 175 def _subst (match, local_vars=local_vars): 176 var_name = match.group(1) 177 if local_vars.has_key(var_name): 178 return str(local_vars[var_name]) 179 else: 180 return os.environ[var_name] 181 182 try: 183 return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s) 184 except KeyError, var: 185 raise ValueError, "invalid variable '$%s'" % var 186 187# subst_vars () 188 189 190def grok_environment_error (exc, prefix="error: "): 191 """Generate a useful error message from an EnvironmentError (IOError or 192 OSError) exception object. Handles Python 1.5.1 and 1.5.2 styles, and 193 does what it can to deal with exception objects that don't have a 194 filename (which happens when the error is due to a two-file operation, 195 such as 'rename()' or 'link()'. Returns the error message as a string 196 prefixed with 'prefix'. 197 """ 198 # check for Python 1.5.2-style {IO,OS}Error exception objects 199 if hasattr(exc, 'filename') and hasattr(exc, 'strerror'): 200 if exc.filename: 201 error = prefix + "%s: %s" % (exc.filename, exc.strerror) 202 else: 203 # two-argument functions in posix module don't 204 # include the filename in the exception object! 205 error = prefix + "%s" % exc.strerror 206 else: 207 error = prefix + str(exc[-1]) 208 209 return error 210 211 212# Needed by 'split_quoted()' 213_wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) 214_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") 215_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') 216 217def split_quoted (s): 218 """Split a string up according to Unix shell-like rules for quotes and 219 backslashes. In short: words are delimited by spaces, as long as those 220 spaces are not escaped by a backslash, or inside a quoted string. 221 Single and double quotes are equivalent, and the quote characters can 222 be backslash-escaped. The backslash is stripped from any two-character 223 escape sequence, leaving only the escaped character. The quote 224 characters are stripped from any quoted string. Returns a list of 225 words. 226 """ 227 228 # This is a nice algorithm for splitting up a single string, since it 229 # doesn't require character-by-character examination. It was a little 230 # bit of a brain-bender to get it working right, though... 231 232 s = string.strip(s) 233 words = [] 234 pos = 0 235 236 while s: 237 m = _wordchars_re.match(s, pos) 238 end = m.end() 239 if end == len(s): 240 words.append(s[:end]) 241 break 242 243 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now 244 words.append(s[:end]) # we definitely have a word delimiter 245 s = string.lstrip(s[end:]) 246 pos = 0 247 248 elif s[end] == '\\': # preserve whatever is being escaped; 249 # will become part of the current word 250 s = s[:end] + s[end+1:] 251 pos = end+1 252 253 else: 254 if s[end] == "'": # slurp singly-quoted string 255 m = _squote_re.match(s, end) 256 elif s[end] == '"': # slurp doubly-quoted string 257 m = _dquote_re.match(s, end) 258 else: 259 raise RuntimeError, \ 260 "this can't happen (bad char '%c')" % s[end] 261 262 if m is None: 263 raise ValueError, \ 264 "bad string (mismatched %s quotes?)" % s[end] 265 266 (beg, end) = m.span() 267 s = s[:beg] + s[beg+1:end-1] + s[end:] 268 pos = m.end() - 2 269 270 if pos >= len(s): 271 words.append(s) 272 break 273 274 return words 275 276# split_quoted () 277 278 279def execute (func, args, msg=None, verbose=0, dry_run=0): 280 """Perform some action that affects the outside world (eg. by 281 writing to the filesystem). Such actions are special because they 282 are disabled by the 'dry_run' flag. This method takes care of all 283 that bureaucracy for you; all you have to do is supply the 284 function to call and an argument tuple for it (to embody the 285 "external action" being performed), and an optional message to 286 print. 287 """ 288 if msg is None: 289 msg = "%s%s" % (func.__name__, `args`) 290 if msg[-2:] == ',)': # correct for singleton tuple 291 msg = msg[0:-2] + ')' 292 293 log.info(msg) 294 if not dry_run: 295 apply(func, args) 296 297 298def strtobool (val): 299 """Convert a string representation of truth to true (1) or false (0). 300 301 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values 302 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 303 'val' is anything else. 304 """ 305 val = string.lower(val) 306 if val in ('y', 'yes', 't', 'true', 'on', '1'): 307 return 1 308 elif val in ('n', 'no', 'f', 'false', 'off', '0'): 309 return 0 310 else: 311 raise ValueError, "invalid truth value %s" % `val` 312 313 314def byte_compile (py_files, 315 optimize=0, force=0, 316 prefix=None, base_dir=None, 317 verbose=1, dry_run=0, 318 direct=None): 319 """Byte-compile a collection of Python source files to either .pyc 320 or .pyo files in the same directory. 'py_files' is a list of files 321 to compile; any files that don't end in ".py" are silently skipped. 322 'optimize' must be one of the following: 323 0 - don't optimize (generate .pyc) 324 1 - normal optimization (like "python -O") 325 2 - extra optimization (like "python -OO") 326 If 'force' is true, all files are recompiled regardless of 327 timestamps. 328 329 The source filename encoded in each bytecode file defaults to the 330 filenames listed in 'py_files'; you can modify these with 'prefix' and 331 'basedir'. 'prefix' is a string that will be stripped off of each 332 source filename, and 'base_dir' is a directory name that will be 333 prepended (after 'prefix' is stripped). You can supply either or both 334 (or neither) of 'prefix' and 'base_dir', as you wish. 335 336 If 'dry_run' is true, doesn't actually do anything that would 337 affect the filesystem. 338 339 Byte-compilation is either done directly in this interpreter process 340 with the standard py_compile module, or indirectly by writing a 341 temporary script and executing it. Normally, you should let 342 'byte_compile()' figure out to use direct compilation or not (see 343 the source for details). The 'direct' flag is used by the script 344 generated in indirect mode; unless you know what you're doing, leave 345 it set to None. 346 """ 347 348 # First, if the caller didn't force us into direct or indirect mode, 349 # figure out which mode we should be in. We take a conservative 350 # approach: choose direct mode *only* if the current interpreter is 351 # in debug mode and optimize is 0. If we're not in debug mode (-O 352 # or -OO), we don't know which level of optimization this 353 # interpreter is running with, so we can't do direct 354 # byte-compilation and be certain that it's the right thing. Thus, 355 # always compile indirectly if the current interpreter is in either 356 # optimize mode, or if either optimization level was requested by 357 # the caller. 358 if direct is None: 359 direct = (__debug__ and optimize == 0) 360 361 # "Indirect" byte-compilation: write a temporary script and then 362 # run it with the appropriate flags. 363 if not direct: 364 from tempfile import mkstemp 365 (script_fd, script_name) = mkstemp(".py") 366 log.info("writing byte-compilation script '%s'", script_name) 367 if not dry_run: 368 script = os.fdopen(script_fd, "w") 369 370 script.write("""\ 371from distutils.util import byte_compile 372files = [ 373""") 374 375 # XXX would be nice to write absolute filenames, just for 376 # safety's sake (script should be more robust in the face of 377 # chdir'ing before running it). But this requires abspath'ing 378 # 'prefix' as well, and that breaks the hack in build_lib's 379 # 'byte_compile()' method that carefully tacks on a trailing 380 # slash (os.sep really) to make sure the prefix here is "just 381 # right". This whole prefix business is rather delicate -- the 382 # problem is that it's really a directory, but I'm treating it 383 # as a dumb string, so trailing slashes and so forth matter. 384 385 #py_files = map(os.path.abspath, py_files) 386 #if prefix: 387 # prefix = os.path.abspath(prefix) 388 389 script.write(string.join(map(repr, py_files), ",\n") + "]\n") 390 script.write(""" 391byte_compile(files, optimize=%s, force=%s, 392 prefix=%s, base_dir=%s, 393 verbose=%s, dry_run=0, 394 direct=1) 395""" % (`optimize`, `force`, `prefix`, `base_dir`, `verbose`)) 396 397 script.close() 398 399 cmd = [sys.executable, script_name] 400 if optimize == 1: 401 cmd.insert(1, "-O") 402 elif optimize == 2: 403 cmd.insert(1, "-OO") 404 spawn(cmd, dry_run=dry_run) 405 execute(os.remove, (script_name,), "removing %s" % script_name, 406 dry_run=dry_run) 407 408 # "Direct" byte-compilation: use the py_compile module to compile 409 # right here, right now. Note that the script generated in indirect 410 # mode simply calls 'byte_compile()' in direct mode, a weird sort of 411 # cross-process recursion. Hey, it works! 412 else: 413 from py_compile import compile 414 415 for file in py_files: 416 if file[-3:] != ".py": 417 # This lets us be lazy and not filter filenames in 418 # the "install_lib" command. 419 continue 420 421 # Terminology from the py_compile module: 422 # cfile - byte-compiled file 423 # dfile - purported source filename (same as 'file' by default) 424 cfile = file + (__debug__ and "c" or "o") 425 dfile = file 426 if prefix: 427 if file[:len(prefix)] != prefix: 428 raise ValueError, \ 429 ("invalid prefix: filename %s doesn't start with %s" 430 % (`file`, `prefix`)) 431 dfile = dfile[len(prefix):] 432 if base_dir: 433 dfile = os.path.join(base_dir, dfile) 434 435 cfile_base = os.path.basename(cfile) 436 if direct: 437 if force or newer(file, cfile): 438 log.info("byte-compiling %s to %s", file, cfile_base) 439 if not dry_run: 440 compile(file, cfile, dfile) 441 else: 442 log.debug("skipping byte-compilation of %s to %s", 443 file, cfile_base) 444 445# byte_compile () 446 447def rfc822_escape (header): 448 """Return a version of the string escaped for inclusion in an 449 RFC-822 header, by ensuring there are 8 spaces space after each newline. 450 """ 451 lines = string.split(header, '\n') 452 lines = map(string.strip, lines) 453 header = string.join(lines, '\n' + 8*' ') 454 return header 455