util.py revision b0df6a1afa585c5f4d097aeb68e41996a9fff9d7
1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7# created 1999/03/08, Greg Ward
8
9__revision__ = "$Id$"
10
11import sys, os, string, re
12from distutils.errors import DistutilsPlatformError
13from distutils.dep_util import newer
14from distutils.spawn import spawn
15from distutils import log
16
17def get_platform ():
18    """Return a string that identifies the current platform.  This is used
19    mainly to distinguish platform-specific build directories and
20    platform-specific built distributions.  Typically includes the OS name
21    and version and the architecture (as supplied by 'os.uname()'),
22    although the exact information included depends on the OS; eg. for IRIX
23    the architecture isn't particularly important (IRIX only runs on SGI
24    hardware), but for Linux the kernel version isn't particularly
25    important.
26
27    Examples of returned values:
28       linux-i586
29       linux-alpha (?)
30       solaris-2.6-sun4u
31       irix-5.3
32       irix64-6.2
33
34    For non-POSIX platforms, currently just returns 'sys.platform'.
35    """
36    if os.name != "posix" or not hasattr(os, 'uname'):
37        # XXX what about the architecture? NT is Intel or Alpha,
38        # Mac OS is M68k or PPC, etc.
39        return sys.platform
40
41    # Try to distinguish various flavours of Unix
42
43    (osname, host, release, version, machine) = os.uname()
44
45    # Convert the OS name to lowercase and remove '/' characters
46    # (to accommodate BSD/OS)
47    osname = string.lower(osname)
48    osname = string.replace(osname, '/', '')
49
50    if osname[:5] == "linux":
51        # At least on Linux/Intel, 'machine' is the processor --
52        # i386, etc.
53        # XXX what about Alpha, SPARC, etc?
54        return  "%s-%s" % (osname, machine)
55    elif osname[:5] == "sunos":
56        if release[0] >= "5":           # SunOS 5 == Solaris 2
57            osname = "solaris"
58            release = "%d.%s" % (int(release[0]) - 3, release[2:])
59        # fall through to standard osname-release-machine representation
60    elif osname[:4] == "irix":              # could be "irix64"!
61        return "%s-%s" % (osname, release)
62    elif osname[:3] == "aix":
63        return "%s-%s.%s" % (osname, version, release)
64    elif osname[:6] == "cygwin":
65        osname = "cygwin"
66        rel_re = re.compile (r'[\d.]+')
67        m = rel_re.match(release)
68        if m:
69            release = m.group()
70
71    return "%s-%s-%s" % (osname, release, machine)
72
73# get_platform ()
74
75
76def convert_path (pathname):
77    """Return 'pathname' as a name that will work on the native filesystem,
78    i.e. split it on '/' and put it back together again using the current
79    directory separator.  Needed because filenames in the setup script are
80    always supplied in Unix style, and have to be converted to the local
81    convention before we can actually use them in the filesystem.  Raises
82    ValueError on non-Unix-ish systems if 'pathname' either starts or
83    ends with a slash.
84    """
85    if os.sep == '/':
86        return pathname
87    if not pathname:
88        return pathname
89    if pathname[0] == '/':
90        raise ValueError, "path '%s' cannot be absolute" % pathname
91    if pathname[-1] == '/':
92        raise ValueError, "path '%s' cannot end with '/'" % pathname
93
94    paths = string.split(pathname, '/')
95    while '.' in paths:
96        paths.remove('.')
97    if not paths:
98        return os.curdir
99    return apply(os.path.join, paths)
100
101# convert_path ()
102
103
104def change_root (new_root, pathname):
105    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
106    relative, this is equivalent to "os.path.join(new_root,pathname)".
107    Otherwise, it requires making 'pathname' relative and then joining the
108    two, which is tricky on DOS/Windows and Mac OS.
109    """
110    if os.name == 'posix':
111        if not os.path.isabs(pathname):
112            return os.path.join(new_root, pathname)
113        else:
114            return os.path.join(new_root, pathname[1:])
115
116    elif os.name == 'nt':
117        (drive, path) = os.path.splitdrive(pathname)
118        if path[0] == '\\':
119            path = path[1:]
120        return os.path.join(new_root, path)
121
122    elif os.name == 'os2':
123        (drive, path) = os.path.splitdrive(pathname)
124        if path[0] == os.sep:
125            path = path[1:]
126        return os.path.join(new_root, path)
127
128    elif os.name == 'mac':
129        if not os.path.isabs(pathname):
130            return os.path.join(new_root, pathname)
131        else:
132            # Chop off volume name from start of path
133            elements = string.split(pathname, ":", 1)
134            pathname = ":" + elements[1]
135            return os.path.join(new_root, pathname)
136
137    else:
138        raise DistutilsPlatformError, \
139              "nothing known about platform '%s'" % os.name
140
141
142_environ_checked = 0
143def check_environ ():
144    """Ensure that 'os.environ' has all the environment variables we
145    guarantee that users can use in config files, command-line options,
146    etc.  Currently this includes:
147      HOME - user's home directory (Unix only)
148      PLAT - description of the current platform, including hardware
149             and OS (see 'get_platform()')
150    """
151    global _environ_checked
152    if _environ_checked:
153        return
154
155    if os.name == 'posix' and not os.environ.has_key('HOME'):
156        import pwd
157        os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
158
159    if not os.environ.has_key('PLAT'):
160        os.environ['PLAT'] = get_platform()
161
162    _environ_checked = 1
163
164
165def subst_vars (s, local_vars):
166    """Perform shell/Perl-style variable substitution on 'string'.  Every
167    occurrence of '$' followed by a name is considered a variable, and
168    variable is substituted by the value found in the 'local_vars'
169    dictionary, or in 'os.environ' if it's not in 'local_vars'.
170    'os.environ' is first checked/augmented to guarantee that it contains
171    certain values: see 'check_environ()'.  Raise ValueError for any
172    variables not found in either 'local_vars' or 'os.environ'.
173    """
174    check_environ()
175    def _subst (match, local_vars=local_vars):
176        var_name = match.group(1)
177        if local_vars.has_key(var_name):
178            return str(local_vars[var_name])
179        else:
180            return os.environ[var_name]
181
182    try:
183        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
184    except KeyError, var:
185        raise ValueError, "invalid variable '$%s'" % var
186
187# subst_vars ()
188
189
190def grok_environment_error (exc, prefix="error: "):
191    """Generate a useful error message from an EnvironmentError (IOError or
192    OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
193    does what it can to deal with exception objects that don't have a
194    filename (which happens when the error is due to a two-file operation,
195    such as 'rename()' or 'link()'.  Returns the error message as a string
196    prefixed with 'prefix'.
197    """
198    # check for Python 1.5.2-style {IO,OS}Error exception objects
199    if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
200        if exc.filename:
201            error = prefix + "%s: %s" % (exc.filename, exc.strerror)
202        else:
203            # two-argument functions in posix module don't
204            # include the filename in the exception object!
205            error = prefix + "%s" % exc.strerror
206    else:
207        error = prefix + str(exc[-1])
208
209    return error
210
211
212# Needed by 'split_quoted()'
213_wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
214_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
215_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
216
217def split_quoted (s):
218    """Split a string up according to Unix shell-like rules for quotes and
219    backslashes.  In short: words are delimited by spaces, as long as those
220    spaces are not escaped by a backslash, or inside a quoted string.
221    Single and double quotes are equivalent, and the quote characters can
222    be backslash-escaped.  The backslash is stripped from any two-character
223    escape sequence, leaving only the escaped character.  The quote
224    characters are stripped from any quoted string.  Returns a list of
225    words.
226    """
227
228    # This is a nice algorithm for splitting up a single string, since it
229    # doesn't require character-by-character examination.  It was a little
230    # bit of a brain-bender to get it working right, though...
231
232    s = string.strip(s)
233    words = []
234    pos = 0
235
236    while s:
237        m = _wordchars_re.match(s, pos)
238        end = m.end()
239        if end == len(s):
240            words.append(s[:end])
241            break
242
243        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
244            words.append(s[:end])       # we definitely have a word delimiter
245            s = string.lstrip(s[end:])
246            pos = 0
247
248        elif s[end] == '\\':            # preserve whatever is being escaped;
249                                        # will become part of the current word
250            s = s[:end] + s[end+1:]
251            pos = end+1
252
253        else:
254            if s[end] == "'":           # slurp singly-quoted string
255                m = _squote_re.match(s, end)
256            elif s[end] == '"':         # slurp doubly-quoted string
257                m = _dquote_re.match(s, end)
258            else:
259                raise RuntimeError, \
260                      "this can't happen (bad char '%c')" % s[end]
261
262            if m is None:
263                raise ValueError, \
264                      "bad string (mismatched %s quotes?)" % s[end]
265
266            (beg, end) = m.span()
267            s = s[:beg] + s[beg+1:end-1] + s[end:]
268            pos = m.end() - 2
269
270        if pos >= len(s):
271            words.append(s)
272            break
273
274    return words
275
276# split_quoted ()
277
278
279def execute (func, args, msg=None, verbose=0, dry_run=0):
280    """Perform some action that affects the outside world (eg.  by
281    writing to the filesystem).  Such actions are special because they
282    are disabled by the 'dry_run' flag.  This method takes care of all
283    that bureaucracy for you; all you have to do is supply the
284    function to call and an argument tuple for it (to embody the
285    "external action" being performed), and an optional message to
286    print.
287    """
288    if msg is None:
289        msg = "%s%s" % (func.__name__, `args`)
290        if msg[-2:] == ',)':        # correct for singleton tuple
291            msg = msg[0:-2] + ')'
292
293    log.info(msg)
294    if not dry_run:
295        apply(func, args)
296
297
298def strtobool (val):
299    """Convert a string representation of truth to true (1) or false (0).
300
301    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
302    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
303    'val' is anything else.
304    """
305    val = string.lower(val)
306    if val in ('y', 'yes', 't', 'true', 'on', '1'):
307        return 1
308    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
309        return 0
310    else:
311        raise ValueError, "invalid truth value %s" % `val`
312
313
314def byte_compile (py_files,
315                  optimize=0, force=0,
316                  prefix=None, base_dir=None,
317                  verbose=1, dry_run=0,
318                  direct=None):
319    """Byte-compile a collection of Python source files to either .pyc
320    or .pyo files in the same directory.  'py_files' is a list of files
321    to compile; any files that don't end in ".py" are silently skipped.
322    'optimize' must be one of the following:
323      0 - don't optimize (generate .pyc)
324      1 - normal optimization (like "python -O")
325      2 - extra optimization (like "python -OO")
326    If 'force' is true, all files are recompiled regardless of
327    timestamps.
328
329    The source filename encoded in each bytecode file defaults to the
330    filenames listed in 'py_files'; you can modify these with 'prefix' and
331    'basedir'.  'prefix' is a string that will be stripped off of each
332    source filename, and 'base_dir' is a directory name that will be
333    prepended (after 'prefix' is stripped).  You can supply either or both
334    (or neither) of 'prefix' and 'base_dir', as you wish.
335
336    If 'dry_run' is true, doesn't actually do anything that would
337    affect the filesystem.
338
339    Byte-compilation is either done directly in this interpreter process
340    with the standard py_compile module, or indirectly by writing a
341    temporary script and executing it.  Normally, you should let
342    'byte_compile()' figure out to use direct compilation or not (see
343    the source for details).  The 'direct' flag is used by the script
344    generated in indirect mode; unless you know what you're doing, leave
345    it set to None.
346    """
347
348    # First, if the caller didn't force us into direct or indirect mode,
349    # figure out which mode we should be in.  We take a conservative
350    # approach: choose direct mode *only* if the current interpreter is
351    # in debug mode and optimize is 0.  If we're not in debug mode (-O
352    # or -OO), we don't know which level of optimization this
353    # interpreter is running with, so we can't do direct
354    # byte-compilation and be certain that it's the right thing.  Thus,
355    # always compile indirectly if the current interpreter is in either
356    # optimize mode, or if either optimization level was requested by
357    # the caller.
358    if direct is None:
359        direct = (__debug__ and optimize == 0)
360
361    # "Indirect" byte-compilation: write a temporary script and then
362    # run it with the appropriate flags.
363    if not direct:
364        from tempfile import mkstemp
365        (script_fd, script_name) = mkstemp(".py")
366        log.info("writing byte-compilation script '%s'", script_name)
367        if not dry_run:
368            script = os.fdopen(script_fd, "w")
369
370            script.write("""\
371from distutils.util import byte_compile
372files = [
373""")
374
375            # XXX would be nice to write absolute filenames, just for
376            # safety's sake (script should be more robust in the face of
377            # chdir'ing before running it).  But this requires abspath'ing
378            # 'prefix' as well, and that breaks the hack in build_lib's
379            # 'byte_compile()' method that carefully tacks on a trailing
380            # slash (os.sep really) to make sure the prefix here is "just
381            # right".  This whole prefix business is rather delicate -- the
382            # problem is that it's really a directory, but I'm treating it
383            # as a dumb string, so trailing slashes and so forth matter.
384
385            #py_files = map(os.path.abspath, py_files)
386            #if prefix:
387            #    prefix = os.path.abspath(prefix)
388
389            script.write(string.join(map(repr, py_files), ",\n") + "]\n")
390            script.write("""
391byte_compile(files, optimize=%s, force=%s,
392             prefix=%s, base_dir=%s,
393             verbose=%s, dry_run=0,
394             direct=1)
395""" % (`optimize`, `force`, `prefix`, `base_dir`, `verbose`))
396
397            script.close()
398
399        cmd = [sys.executable, script_name]
400        if optimize == 1:
401            cmd.insert(1, "-O")
402        elif optimize == 2:
403            cmd.insert(1, "-OO")
404        spawn(cmd, dry_run=dry_run)
405        execute(os.remove, (script_name,), "removing %s" % script_name,
406                dry_run=dry_run)
407
408    # "Direct" byte-compilation: use the py_compile module to compile
409    # right here, right now.  Note that the script generated in indirect
410    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
411    # cross-process recursion.  Hey, it works!
412    else:
413        from py_compile import compile
414
415        for file in py_files:
416            if file[-3:] != ".py":
417                # This lets us be lazy and not filter filenames in
418                # the "install_lib" command.
419                continue
420
421            # Terminology from the py_compile module:
422            #   cfile - byte-compiled file
423            #   dfile - purported source filename (same as 'file' by default)
424            cfile = file + (__debug__ and "c" or "o")
425            dfile = file
426            if prefix:
427                if file[:len(prefix)] != prefix:
428                    raise ValueError, \
429                          ("invalid prefix: filename %s doesn't start with %s"
430                           % (`file`, `prefix`))
431                dfile = dfile[len(prefix):]
432            if base_dir:
433                dfile = os.path.join(base_dir, dfile)
434
435            cfile_base = os.path.basename(cfile)
436            if direct:
437                if force or newer(file, cfile):
438                    log.info("byte-compiling %s to %s", file, cfile_base)
439                    if not dry_run:
440                        compile(file, cfile, dfile)
441                else:
442                    log.debug("skipping byte-compilation of %s to %s",
443                              file, cfile_base)
444
445# byte_compile ()
446
447def rfc822_escape (header):
448    """Return a version of the string escaped for inclusion in an
449    RFC-822 header, by ensuring there are 8 spaces space after each newline.
450    """
451    lines = string.split(header, '\n')
452    lines = map(string.strip, lines)
453    header = string.join(lines, '\n' + 8*' ')
454    return header
455