1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10from distutils.errors import DistutilsPlatformError
11from distutils.dep_util import newer
12from distutils.spawn import spawn
13from distutils import log
14from distutils.errors import DistutilsByteCompileError
15
16def get_platform ():
17    """Return a string that identifies the current platform.  This is used
18    mainly to distinguish platform-specific build directories and
19    platform-specific built distributions.  Typically includes the OS name
20    and version and the architecture (as supplied by 'os.uname()'),
21    although the exact information included depends on the OS; eg. for IRIX
22    the architecture isn't particularly important (IRIX only runs on SGI
23    hardware), but for Linux the kernel version isn't particularly
24    important.
25
26    Examples of returned values:
27       linux-i586
28       linux-alpha (?)
29       solaris-2.6-sun4u
30       irix-5.3
31       irix64-6.2
32
33    Windows will return one of:
34       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
35       win-ia64 (64bit Windows on Itanium)
36       win32 (all others - specifically, sys.platform is returned)
37
38    For other non-POSIX platforms, currently just returns 'sys.platform'.
39    """
40    if os.name == 'nt':
41        # sniff sys.version for architecture.
42        prefix = " bit ("
43        i = string.find(sys.version, prefix)
44        if i == -1:
45            return sys.platform
46        j = string.find(sys.version, ")", i)
47        look = sys.version[i+len(prefix):j].lower()
48        if look=='amd64':
49            return 'win-amd64'
50        if look=='itanium':
51            return 'win-ia64'
52        return sys.platform
53
54    # Set for cross builds explicitly
55    if "_PYTHON_HOST_PLATFORM" in os.environ:
56        return os.environ["_PYTHON_HOST_PLATFORM"]
57
58    if os.name != "posix" or not hasattr(os, 'uname'):
59        # XXX what about the architecture? NT is Intel or Alpha,
60        # Mac OS is M68k or PPC, etc.
61        return sys.platform
62
63    # Try to distinguish various flavours of Unix
64
65    (osname, host, release, version, machine) = os.uname()
66
67    # Convert the OS name to lowercase, remove '/' characters
68    # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
69    osname = string.lower(osname)
70    osname = string.replace(osname, '/', '')
71    machine = string.replace(machine, ' ', '_')
72    machine = string.replace(machine, '/', '-')
73
74    if osname[:5] == "linux":
75        # At least on Linux/Intel, 'machine' is the processor --
76        # i386, etc.
77        # XXX what about Alpha, SPARC, etc?
78        return  "%s-%s" % (osname, machine)
79    elif osname[:5] == "sunos":
80        if release[0] >= "5":           # SunOS 5 == Solaris 2
81            osname = "solaris"
82            release = "%d.%s" % (int(release[0]) - 3, release[2:])
83            # We can't use "platform.architecture()[0]" because a
84            # bootstrap problem. We use a dict to get an error
85            # if some suspicious happens.
86            bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
87            machine += ".%s" % bitness[sys.maxint]
88        # fall through to standard osname-release-machine representation
89    elif osname[:4] == "irix":              # could be "irix64"!
90        return "%s-%s" % (osname, release)
91    elif osname[:3] == "aix":
92        return "%s-%s.%s" % (osname, version, release)
93    elif osname[:6] == "cygwin":
94        osname = "cygwin"
95        rel_re = re.compile (r'[\d.]+')
96        m = rel_re.match(release)
97        if m:
98            release = m.group()
99    elif osname[:6] == "darwin":
100        import _osx_support, distutils.sysconfig
101        osname, release, machine = _osx_support.get_platform_osx(
102                                        distutils.sysconfig.get_config_vars(),
103                                        osname, release, machine)
104
105    return "%s-%s-%s" % (osname, release, machine)
106
107# get_platform ()
108
109
110def convert_path (pathname):
111    """Return 'pathname' as a name that will work on the native filesystem,
112    i.e. split it on '/' and put it back together again using the current
113    directory separator.  Needed because filenames in the setup script are
114    always supplied in Unix style, and have to be converted to the local
115    convention before we can actually use them in the filesystem.  Raises
116    ValueError on non-Unix-ish systems if 'pathname' either starts or
117    ends with a slash.
118    """
119    if os.sep == '/':
120        return pathname
121    if not pathname:
122        return pathname
123    if pathname[0] == '/':
124        raise ValueError, "path '%s' cannot be absolute" % pathname
125    if pathname[-1] == '/':
126        raise ValueError, "path '%s' cannot end with '/'" % pathname
127
128    paths = string.split(pathname, '/')
129    while '.' in paths:
130        paths.remove('.')
131    if not paths:
132        return os.curdir
133    # On Windows, if paths is ['C:','folder','subfolder'] then
134    # os.path.join(*paths) will return 'C:folder\subfolder' which
135    # is thus relative to the CWD on that drive. So we work around
136    # this by adding a \ to path[0]
137    if (len(paths) > 0 and paths[0].endswith(':') and
138        sys.platform == "win32" and sys.version.find("GCC") >= 0):
139        paths[0] += '\\'
140    return os.path.join(*paths)
141
142# convert_path ()
143
144
145def change_root (new_root, pathname):
146    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
147    relative, this is equivalent to "os.path.join(new_root,pathname)".
148    Otherwise, it requires making 'pathname' relative and then joining the
149    two, which is tricky on DOS/Windows and Mac OS.
150    """
151    if os.name == 'posix':
152        if not os.path.isabs(pathname):
153            return os.path.join(new_root, pathname)
154        else:
155            return os.path.join(new_root, pathname[1:])
156
157    elif os.name == 'nt':
158        (drive, path) = os.path.splitdrive(pathname)
159        if path[0] == '\\':
160            path = path[1:]
161        return os.path.join(new_root, path)
162
163    elif os.name == 'os2':
164        (drive, path) = os.path.splitdrive(pathname)
165        if path[0] == os.sep:
166            path = path[1:]
167        return os.path.join(new_root, path)
168
169    else:
170        raise DistutilsPlatformError, \
171              "nothing known about platform '%s'" % os.name
172
173
174_environ_checked = 0
175def check_environ ():
176    """Ensure that 'os.environ' has all the environment variables we
177    guarantee that users can use in config files, command-line options,
178    etc.  Currently this includes:
179      HOME - user's home directory (Unix only)
180      PLAT - description of the current platform, including hardware
181             and OS (see 'get_platform()')
182    """
183    global _environ_checked
184    if _environ_checked:
185        return
186
187    if os.name == 'posix' and 'HOME' not in os.environ:
188        import pwd
189        os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
190
191    if 'PLAT' not in os.environ:
192        os.environ['PLAT'] = get_platform()
193
194    _environ_checked = 1
195
196
197def subst_vars (s, local_vars):
198    """Perform shell/Perl-style variable substitution on 'string'.  Every
199    occurrence of '$' followed by a name is considered a variable, and
200    variable is substituted by the value found in the 'local_vars'
201    dictionary, or in 'os.environ' if it's not in 'local_vars'.
202    'os.environ' is first checked/augmented to guarantee that it contains
203    certain values: see 'check_environ()'.  Raise ValueError for any
204    variables not found in either 'local_vars' or 'os.environ'.
205    """
206    check_environ()
207    def _subst (match, local_vars=local_vars):
208        var_name = match.group(1)
209        if var_name in local_vars:
210            return str(local_vars[var_name])
211        else:
212            return os.environ[var_name]
213
214    try:
215        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
216    except KeyError, var:
217        raise ValueError, "invalid variable '$%s'" % var
218
219# subst_vars ()
220
221
222def grok_environment_error (exc, prefix="error: "):
223    """Generate a useful error message from an EnvironmentError (IOError or
224    OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
225    does what it can to deal with exception objects that don't have a
226    filename (which happens when the error is due to a two-file operation,
227    such as 'rename()' or 'link()'.  Returns the error message as a string
228    prefixed with 'prefix'.
229    """
230    # check for Python 1.5.2-style {IO,OS}Error exception objects
231    if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
232        if exc.filename:
233            error = prefix + "%s: %s" % (exc.filename, exc.strerror)
234        else:
235            # two-argument functions in posix module don't
236            # include the filename in the exception object!
237            error = prefix + "%s" % exc.strerror
238    else:
239        error = prefix + str(exc[-1])
240
241    return error
242
243
244# Needed by 'split_quoted()'
245_wordchars_re = _squote_re = _dquote_re = None
246def _init_regex():
247    global _wordchars_re, _squote_re, _dquote_re
248    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
249    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
250    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
251
252def split_quoted (s):
253    """Split a string up according to Unix shell-like rules for quotes and
254    backslashes.  In short: words are delimited by spaces, as long as those
255    spaces are not escaped by a backslash, or inside a quoted string.
256    Single and double quotes are equivalent, and the quote characters can
257    be backslash-escaped.  The backslash is stripped from any two-character
258    escape sequence, leaving only the escaped character.  The quote
259    characters are stripped from any quoted string.  Returns a list of
260    words.
261    """
262
263    # This is a nice algorithm for splitting up a single string, since it
264    # doesn't require character-by-character examination.  It was a little
265    # bit of a brain-bender to get it working right, though...
266    if _wordchars_re is None: _init_regex()
267
268    s = string.strip(s)
269    words = []
270    pos = 0
271
272    while s:
273        m = _wordchars_re.match(s, pos)
274        end = m.end()
275        if end == len(s):
276            words.append(s[:end])
277            break
278
279        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
280            words.append(s[:end])       # we definitely have a word delimiter
281            s = string.lstrip(s[end:])
282            pos = 0
283
284        elif s[end] == '\\':            # preserve whatever is being escaped;
285                                        # will become part of the current word
286            s = s[:end] + s[end+1:]
287            pos = end+1
288
289        else:
290            if s[end] == "'":           # slurp singly-quoted string
291                m = _squote_re.match(s, end)
292            elif s[end] == '"':         # slurp doubly-quoted string
293                m = _dquote_re.match(s, end)
294            else:
295                raise RuntimeError, \
296                      "this can't happen (bad char '%c')" % s[end]
297
298            if m is None:
299                raise ValueError, \
300                      "bad string (mismatched %s quotes?)" % s[end]
301
302            (beg, end) = m.span()
303            s = s[:beg] + s[beg+1:end-1] + s[end:]
304            pos = m.end() - 2
305
306        if pos >= len(s):
307            words.append(s)
308            break
309
310    return words
311
312# split_quoted ()
313
314
315def execute (func, args, msg=None, verbose=0, dry_run=0):
316    """Perform some action that affects the outside world (eg.  by
317    writing to the filesystem).  Such actions are special because they
318    are disabled by the 'dry_run' flag.  This method takes care of all
319    that bureaucracy for you; all you have to do is supply the
320    function to call and an argument tuple for it (to embody the
321    "external action" being performed), and an optional message to
322    print.
323    """
324    if msg is None:
325        msg = "%s%r" % (func.__name__, args)
326        if msg[-2:] == ',)':        # correct for singleton tuple
327            msg = msg[0:-2] + ')'
328
329    log.info(msg)
330    if not dry_run:
331        func(*args)
332
333
334def strtobool (val):
335    """Convert a string representation of truth to true (1) or false (0).
336
337    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
338    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
339    'val' is anything else.
340    """
341    val = string.lower(val)
342    if val in ('y', 'yes', 't', 'true', 'on', '1'):
343        return 1
344    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
345        return 0
346    else:
347        raise ValueError, "invalid truth value %r" % (val,)
348
349
350def byte_compile (py_files,
351                  optimize=0, force=0,
352                  prefix=None, base_dir=None,
353                  verbose=1, dry_run=0,
354                  direct=None):
355    """Byte-compile a collection of Python source files to either .pyc
356    or .pyo files in the same directory.  'py_files' is a list of files
357    to compile; any files that don't end in ".py" are silently skipped.
358    'optimize' must be one of the following:
359      0 - don't optimize (generate .pyc)
360      1 - normal optimization (like "python -O")
361      2 - extra optimization (like "python -OO")
362    If 'force' is true, all files are recompiled regardless of
363    timestamps.
364
365    The source filename encoded in each bytecode file defaults to the
366    filenames listed in 'py_files'; you can modify these with 'prefix' and
367    'basedir'.  'prefix' is a string that will be stripped off of each
368    source filename, and 'base_dir' is a directory name that will be
369    prepended (after 'prefix' is stripped).  You can supply either or both
370    (or neither) of 'prefix' and 'base_dir', as you wish.
371
372    If 'dry_run' is true, doesn't actually do anything that would
373    affect the filesystem.
374
375    Byte-compilation is either done directly in this interpreter process
376    with the standard py_compile module, or indirectly by writing a
377    temporary script and executing it.  Normally, you should let
378    'byte_compile()' figure out to use direct compilation or not (see
379    the source for details).  The 'direct' flag is used by the script
380    generated in indirect mode; unless you know what you're doing, leave
381    it set to None.
382    """
383    # nothing is done if sys.dont_write_bytecode is True
384    if sys.dont_write_bytecode:
385        raise DistutilsByteCompileError('byte-compiling is disabled.')
386
387    # First, if the caller didn't force us into direct or indirect mode,
388    # figure out which mode we should be in.  We take a conservative
389    # approach: choose direct mode *only* if the current interpreter is
390    # in debug mode and optimize is 0.  If we're not in debug mode (-O
391    # or -OO), we don't know which level of optimization this
392    # interpreter is running with, so we can't do direct
393    # byte-compilation and be certain that it's the right thing.  Thus,
394    # always compile indirectly if the current interpreter is in either
395    # optimize mode, or if either optimization level was requested by
396    # the caller.
397    if direct is None:
398        direct = (__debug__ and optimize == 0)
399
400    # "Indirect" byte-compilation: write a temporary script and then
401    # run it with the appropriate flags.
402    if not direct:
403        try:
404            from tempfile import mkstemp
405            (script_fd, script_name) = mkstemp(".py")
406        except ImportError:
407            from tempfile import mktemp
408            (script_fd, script_name) = None, mktemp(".py")
409        log.info("writing byte-compilation script '%s'", script_name)
410        if not dry_run:
411            if script_fd is not None:
412                script = os.fdopen(script_fd, "w")
413            else:
414                script = open(script_name, "w")
415
416            script.write("""\
417from distutils.util import byte_compile
418files = [
419""")
420
421            # XXX would be nice to write absolute filenames, just for
422            # safety's sake (script should be more robust in the face of
423            # chdir'ing before running it).  But this requires abspath'ing
424            # 'prefix' as well, and that breaks the hack in build_lib's
425            # 'byte_compile()' method that carefully tacks on a trailing
426            # slash (os.sep really) to make sure the prefix here is "just
427            # right".  This whole prefix business is rather delicate -- the
428            # problem is that it's really a directory, but I'm treating it
429            # as a dumb string, so trailing slashes and so forth matter.
430
431            #py_files = map(os.path.abspath, py_files)
432            #if prefix:
433            #    prefix = os.path.abspath(prefix)
434
435            script.write(string.join(map(repr, py_files), ",\n") + "]\n")
436            script.write("""
437byte_compile(files, optimize=%r, force=%r,
438             prefix=%r, base_dir=%r,
439             verbose=%r, dry_run=0,
440             direct=1)
441""" % (optimize, force, prefix, base_dir, verbose))
442
443            script.close()
444
445        cmd = [sys.executable, script_name]
446        if optimize == 1:
447            cmd.insert(1, "-O")
448        elif optimize == 2:
449            cmd.insert(1, "-OO")
450        spawn(cmd, dry_run=dry_run)
451        execute(os.remove, (script_name,), "removing %s" % script_name,
452                dry_run=dry_run)
453
454    # "Direct" byte-compilation: use the py_compile module to compile
455    # right here, right now.  Note that the script generated in indirect
456    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
457    # cross-process recursion.  Hey, it works!
458    else:
459        from py_compile import compile
460
461        for file in py_files:
462            if file[-3:] != ".py":
463                # This lets us be lazy and not filter filenames in
464                # the "install_lib" command.
465                continue
466
467            # Terminology from the py_compile module:
468            #   cfile - byte-compiled file
469            #   dfile - purported source filename (same as 'file' by default)
470            cfile = file + (__debug__ and "c" or "o")
471            dfile = file
472            if prefix:
473                if file[:len(prefix)] != prefix:
474                    raise ValueError, \
475                          ("invalid prefix: filename %r doesn't start with %r"
476                           % (file, prefix))
477                dfile = dfile[len(prefix):]
478            if base_dir:
479                dfile = os.path.join(base_dir, dfile)
480
481            cfile_base = os.path.basename(cfile)
482            if direct:
483                if force or newer(file, cfile):
484                    log.info("byte-compiling %s to %s", file, cfile_base)
485                    if not dry_run:
486                        compile(file, cfile, dfile)
487                else:
488                    log.debug("skipping byte-compilation of %s to %s",
489                              file, cfile_base)
490
491# byte_compile ()
492
493def rfc822_escape (header):
494    """Return a version of the string escaped for inclusion in an
495    RFC-822 header, by ensuring there are 8 spaces space after each newline.
496    """
497    lines = string.split(header, '\n')
498    header = string.join(lines, '\n' + 8*' ')
499    return header
500