1"""Utilities to support packages."""
2
3# NOTE: This module must remain compatible with Python 2.3, as it is shared
4# by setuptools for distribution with Python 2.3 and up.
5
6import os
7import sys
8import imp
9import os.path
10from types import ModuleType
11
12__all__ = [
13    'get_importer', 'iter_importers', 'get_loader', 'find_loader',
14    'walk_packages', 'iter_modules', 'get_data',
15    'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
16]
17
18def read_code(stream):
19    # This helper is needed in order for the PEP 302 emulation to
20    # correctly handle compiled files
21    import marshal
22
23    magic = stream.read(4)
24    if magic != imp.get_magic():
25        return None
26
27    stream.read(4) # Skip timestamp
28    return marshal.load(stream)
29
30
31def simplegeneric(func):
32    """Make a trivial single-dispatch generic function"""
33    registry = {}
34    def wrapper(*args, **kw):
35        ob = args[0]
36        try:
37            cls = ob.__class__
38        except AttributeError:
39            cls = type(ob)
40        try:
41            mro = cls.__mro__
42        except AttributeError:
43            try:
44                class cls(cls, object):
45                    pass
46                mro = cls.__mro__[1:]
47            except TypeError:
48                mro = object,   # must be an ExtensionClass or some such  :(
49        for t in mro:
50            if t in registry:
51                return registry[t](*args, **kw)
52        else:
53            return func(*args, **kw)
54    try:
55        wrapper.__name__ = func.__name__
56    except (TypeError, AttributeError):
57        pass    # Python 2.3 doesn't allow functions to be renamed
58
59    def register(typ, func=None):
60        if func is None:
61            return lambda f: register(typ, f)
62        registry[typ] = func
63        return func
64
65    wrapper.__dict__ = func.__dict__
66    wrapper.__doc__ = func.__doc__
67    wrapper.register = register
68    return wrapper
69
70
71def walk_packages(path=None, prefix='', onerror=None):
72    """Yields (module_loader, name, ispkg) for all modules recursively
73    on path, or, if path is None, all accessible modules.
74
75    'path' should be either None or a list of paths to look for
76    modules in.
77
78    'prefix' is a string to output on the front of every module name
79    on output.
80
81    Note that this function must import all *packages* (NOT all
82    modules!) on the given path, in order to access the __path__
83    attribute to find submodules.
84
85    'onerror' is a function which gets called with one argument (the
86    name of the package which was being imported) if any exception
87    occurs while trying to import a package.  If no onerror function is
88    supplied, ImportErrors are caught and ignored, while all other
89    exceptions are propagated, terminating the search.
90
91    Examples:
92
93    # list all modules python can access
94    walk_packages()
95
96    # list all submodules of ctypes
97    walk_packages(ctypes.__path__, ctypes.__name__+'.')
98    """
99
100    def seen(p, m={}):
101        if p in m:
102            return True
103        m[p] = True
104
105    for importer, name, ispkg in iter_modules(path, prefix):
106        yield importer, name, ispkg
107
108        if ispkg:
109            try:
110                __import__(name)
111            except ImportError:
112                if onerror is not None:
113                    onerror(name)
114            except Exception:
115                if onerror is not None:
116                    onerror(name)
117                else:
118                    raise
119            else:
120                path = getattr(sys.modules[name], '__path__', None) or []
121
122                # don't traverse path items we've seen before
123                path = [p for p in path if not seen(p)]
124
125                for item in walk_packages(path, name+'.', onerror):
126                    yield item
127
128
129def iter_modules(path=None, prefix=''):
130    """Yields (module_loader, name, ispkg) for all submodules on path,
131    or, if path is None, all top-level modules on sys.path.
132
133    'path' should be either None or a list of paths to look for
134    modules in.
135
136    'prefix' is a string to output on the front of every module name
137    on output.
138    """
139
140    if path is None:
141        importers = iter_importers()
142    else:
143        importers = map(get_importer, path)
144
145    yielded = {}
146    for i in importers:
147        for name, ispkg in iter_importer_modules(i, prefix):
148            if name not in yielded:
149                yielded[name] = 1
150                yield i, name, ispkg
151
152
153#@simplegeneric
154def iter_importer_modules(importer, prefix=''):
155    if not hasattr(importer, 'iter_modules'):
156        return []
157    return importer.iter_modules(prefix)
158
159iter_importer_modules = simplegeneric(iter_importer_modules)
160
161
162class ImpImporter:
163    """PEP 302 Importer that wraps Python's "classic" import algorithm
164
165    ImpImporter(dirname) produces a PEP 302 importer that searches that
166    directory.  ImpImporter(None) produces a PEP 302 importer that searches
167    the current sys.path, plus any modules that are frozen or built-in.
168
169    Note that ImpImporter does not currently support being used by placement
170    on sys.meta_path.
171    """
172
173    def __init__(self, path=None):
174        self.path = path
175
176    def find_module(self, fullname, path=None):
177        # Note: we ignore 'path' argument since it is only used via meta_path
178        subname = fullname.split(".")[-1]
179        if subname != fullname and self.path is None:
180            return None
181        if self.path is None:
182            path = None
183        else:
184            path = [os.path.realpath(self.path)]
185        try:
186            file, filename, etc = imp.find_module(subname, path)
187        except ImportError:
188            return None
189        return ImpLoader(fullname, file, filename, etc)
190
191    def iter_modules(self, prefix=''):
192        if self.path is None or not os.path.isdir(self.path):
193            return
194
195        yielded = {}
196        import inspect
197        try:
198            filenames = os.listdir(self.path)
199        except OSError:
200            # ignore unreadable directories like import does
201            filenames = []
202        filenames.sort()  # handle packages before same-named modules
203
204        for fn in filenames:
205            modname = inspect.getmodulename(fn)
206            if modname=='__init__' or modname in yielded:
207                continue
208
209            path = os.path.join(self.path, fn)
210            ispkg = False
211
212            if not modname and os.path.isdir(path) and '.' not in fn:
213                modname = fn
214                try:
215                    dircontents = os.listdir(path)
216                except OSError:
217                    # ignore unreadable directories like import does
218                    dircontents = []
219                for fn in dircontents:
220                    subname = inspect.getmodulename(fn)
221                    if subname=='__init__':
222                        ispkg = True
223                        break
224                else:
225                    continue    # not a package
226
227            if modname and '.' not in modname:
228                yielded[modname] = 1
229                yield prefix + modname, ispkg
230
231
232class ImpLoader:
233    """PEP 302 Loader that wraps Python's "classic" import algorithm
234    """
235    code = source = None
236
237    def __init__(self, fullname, file, filename, etc):
238        self.file = file
239        self.filename = filename
240        self.fullname = fullname
241        self.etc = etc
242
243    def load_module(self, fullname):
244        self._reopen()
245        try:
246            mod = imp.load_module(fullname, self.file, self.filename, self.etc)
247        finally:
248            if self.file:
249                self.file.close()
250        # Note: we don't set __loader__ because we want the module to look
251        # normal; i.e. this is just a wrapper for standard import machinery
252        return mod
253
254    def get_data(self, pathname):
255        return open(pathname, "rb").read()
256
257    def _reopen(self):
258        if self.file and self.file.closed:
259            mod_type = self.etc[2]
260            if mod_type==imp.PY_SOURCE:
261                self.file = open(self.filename, 'rU')
262            elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
263                self.file = open(self.filename, 'rb')
264
265    def _fix_name(self, fullname):
266        if fullname is None:
267            fullname = self.fullname
268        elif fullname != self.fullname:
269            raise ImportError("Loader for module %s cannot handle "
270                              "module %s" % (self.fullname, fullname))
271        return fullname
272
273    def is_package(self, fullname):
274        fullname = self._fix_name(fullname)
275        return self.etc[2]==imp.PKG_DIRECTORY
276
277    def get_code(self, fullname=None):
278        fullname = self._fix_name(fullname)
279        if self.code is None:
280            mod_type = self.etc[2]
281            if mod_type==imp.PY_SOURCE:
282                source = self.get_source(fullname)
283                self.code = compile(source, self.filename, 'exec')
284            elif mod_type==imp.PY_COMPILED:
285                self._reopen()
286                try:
287                    self.code = read_code(self.file)
288                finally:
289                    self.file.close()
290            elif mod_type==imp.PKG_DIRECTORY:
291                self.code = self._get_delegate().get_code()
292        return self.code
293
294    def get_source(self, fullname=None):
295        fullname = self._fix_name(fullname)
296        if self.source is None:
297            mod_type = self.etc[2]
298            if mod_type==imp.PY_SOURCE:
299                self._reopen()
300                try:
301                    self.source = self.file.read()
302                finally:
303                    self.file.close()
304            elif mod_type==imp.PY_COMPILED:
305                if os.path.exists(self.filename[:-1]):
306                    f = open(self.filename[:-1], 'rU')
307                    self.source = f.read()
308                    f.close()
309            elif mod_type==imp.PKG_DIRECTORY:
310                self.source = self._get_delegate().get_source()
311        return self.source
312
313
314    def _get_delegate(self):
315        return ImpImporter(self.filename).find_module('__init__')
316
317    def get_filename(self, fullname=None):
318        fullname = self._fix_name(fullname)
319        mod_type = self.etc[2]
320        if self.etc[2]==imp.PKG_DIRECTORY:
321            return self._get_delegate().get_filename()
322        elif self.etc[2] in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
323            return self.filename
324        return None
325
326
327try:
328    import zipimport
329    from zipimport import zipimporter
330
331    def iter_zipimport_modules(importer, prefix=''):
332        dirlist = zipimport._zip_directory_cache[importer.archive].keys()
333        dirlist.sort()
334        _prefix = importer.prefix
335        plen = len(_prefix)
336        yielded = {}
337        import inspect
338        for fn in dirlist:
339            if not fn.startswith(_prefix):
340                continue
341
342            fn = fn[plen:].split(os.sep)
343
344            if len(fn)==2 and fn[1].startswith('__init__.py'):
345                if fn[0] not in yielded:
346                    yielded[fn[0]] = 1
347                    yield fn[0], True
348
349            if len(fn)!=1:
350                continue
351
352            modname = inspect.getmodulename(fn[0])
353            if modname=='__init__':
354                continue
355
356            if modname and '.' not in modname and modname not in yielded:
357                yielded[modname] = 1
358                yield prefix + modname, False
359
360    iter_importer_modules.register(zipimporter, iter_zipimport_modules)
361
362except ImportError:
363    pass
364
365
366def get_importer(path_item):
367    """Retrieve a PEP 302 importer for the given path item
368
369    The returned importer is cached in sys.path_importer_cache
370    if it was newly created by a path hook.
371
372    If there is no importer, a wrapper around the basic import
373    machinery is returned. This wrapper is never inserted into
374    the importer cache (None is inserted instead).
375
376    The cache (or part of it) can be cleared manually if a
377    rescan of sys.path_hooks is necessary.
378    """
379    try:
380        importer = sys.path_importer_cache[path_item]
381    except KeyError:
382        for path_hook in sys.path_hooks:
383            try:
384                importer = path_hook(path_item)
385                break
386            except ImportError:
387                pass
388        else:
389            importer = None
390        sys.path_importer_cache.setdefault(path_item, importer)
391
392    if importer is None:
393        try:
394            importer = ImpImporter(path_item)
395        except ImportError:
396            importer = None
397    return importer
398
399
400def iter_importers(fullname=""):
401    """Yield PEP 302 importers for the given module name
402
403    If fullname contains a '.', the importers will be for the package
404    containing fullname, otherwise they will be importers for sys.meta_path,
405    sys.path, and Python's "classic" import machinery, in that order.  If
406    the named module is in a package, that package is imported as a side
407    effect of invoking this function.
408
409    Non PEP 302 mechanisms (e.g. the Windows registry) used by the
410    standard import machinery to find files in alternative locations
411    are partially supported, but are searched AFTER sys.path. Normally,
412    these locations are searched BEFORE sys.path, preventing sys.path
413    entries from shadowing them.
414
415    For this to cause a visible difference in behaviour, there must
416    be a module or package name that is accessible via both sys.path
417    and one of the non PEP 302 file system mechanisms. In this case,
418    the emulation will find the former version, while the builtin
419    import mechanism will find the latter.
420
421    Items of the following types can be affected by this discrepancy:
422        imp.C_EXTENSION, imp.PY_SOURCE, imp.PY_COMPILED, imp.PKG_DIRECTORY
423    """
424    if fullname.startswith('.'):
425        raise ImportError("Relative module names not supported")
426    if '.' in fullname:
427        # Get the containing package's __path__
428        pkg = '.'.join(fullname.split('.')[:-1])
429        if pkg not in sys.modules:
430            __import__(pkg)
431        path = getattr(sys.modules[pkg], '__path__', None) or []
432    else:
433        for importer in sys.meta_path:
434            yield importer
435        path = sys.path
436    for item in path:
437        yield get_importer(item)
438    if '.' not in fullname:
439        yield ImpImporter()
440
441def get_loader(module_or_name):
442    """Get a PEP 302 "loader" object for module_or_name
443
444    If the module or package is accessible via the normal import
445    mechanism, a wrapper around the relevant part of that machinery
446    is returned.  Returns None if the module cannot be found or imported.
447    If the named module is not already imported, its containing package
448    (if any) is imported, in order to establish the package __path__.
449
450    This function uses iter_importers(), and is thus subject to the same
451    limitations regarding platform-specific special import locations such
452    as the Windows registry.
453    """
454    if module_or_name in sys.modules:
455        module_or_name = sys.modules[module_or_name]
456    if isinstance(module_or_name, ModuleType):
457        module = module_or_name
458        loader = getattr(module, '__loader__', None)
459        if loader is not None:
460            return loader
461        fullname = module.__name__
462    else:
463        fullname = module_or_name
464    return find_loader(fullname)
465
466def find_loader(fullname):
467    """Find a PEP 302 "loader" object for fullname
468
469    If fullname contains dots, path must be the containing package's __path__.
470    Returns None if the module cannot be found or imported. This function uses
471    iter_importers(), and is thus subject to the same limitations regarding
472    platform-specific special import locations such as the Windows registry.
473    """
474    for importer in iter_importers(fullname):
475        loader = importer.find_module(fullname)
476        if loader is not None:
477            return loader
478
479    return None
480
481
482def extend_path(path, name):
483    """Extend a package's path.
484
485    Intended use is to place the following code in a package's __init__.py:
486
487        from pkgutil import extend_path
488        __path__ = extend_path(__path__, __name__)
489
490    This will add to the package's __path__ all subdirectories of
491    directories on sys.path named after the package.  This is useful
492    if one wants to distribute different parts of a single logical
493    package as multiple directories.
494
495    It also looks for *.pkg files beginning where * matches the name
496    argument.  This feature is similar to *.pth files (see site.py),
497    except that it doesn't special-case lines starting with 'import'.
498    A *.pkg file is trusted at face value: apart from checking for
499    duplicates, all entries found in a *.pkg file are added to the
500    path, regardless of whether they are exist the filesystem.  (This
501    is a feature.)
502
503    If the input path is not a list (as is the case for frozen
504    packages) it is returned unchanged.  The input path is not
505    modified; an extended copy is returned.  Items are only appended
506    to the copy at the end.
507
508    It is assumed that sys.path is a sequence.  Items of sys.path that
509    are not (unicode or 8-bit) strings referring to existing
510    directories are ignored.  Unicode items of sys.path that cause
511    errors when used as filenames may cause this function to raise an
512    exception (in line with os.path.isdir() behavior).
513    """
514
515    if not isinstance(path, list):
516        # This could happen e.g. when this is called from inside a
517        # frozen package.  Return the path unchanged in that case.
518        return path
519
520    pname = os.path.join(*name.split('.')) # Reconstitute as relative path
521    # Just in case os.extsep != '.'
522    sname = os.extsep.join(name.split('.'))
523    sname_pkg = sname + os.extsep + "pkg"
524    init_py = "__init__" + os.extsep + "py"
525
526    path = path[:] # Start with a copy of the existing path
527
528    for dir in sys.path:
529        if not isinstance(dir, basestring) or not os.path.isdir(dir):
530            continue
531        subdir = os.path.join(dir, pname)
532        # XXX This may still add duplicate entries to path on
533        # case-insensitive filesystems
534        initfile = os.path.join(subdir, init_py)
535        if subdir not in path and os.path.isfile(initfile):
536            path.append(subdir)
537        # XXX Is this the right thing for subpackages like zope.app?
538        # It looks for a file named "zope.app.pkg"
539        pkgfile = os.path.join(dir, sname_pkg)
540        if os.path.isfile(pkgfile):
541            try:
542                f = open(pkgfile)
543            except IOError, msg:
544                sys.stderr.write("Can't open %s: %s\n" %
545                                 (pkgfile, msg))
546            else:
547                for line in f:
548                    line = line.rstrip('\n')
549                    if not line or line.startswith('#'):
550                        continue
551                    path.append(line) # Don't check for existence!
552                f.close()
553
554    return path
555
556def get_data(package, resource):
557    """Get a resource from a package.
558
559    This is a wrapper round the PEP 302 loader get_data API. The package
560    argument should be the name of a package, in standard module format
561    (foo.bar). The resource argument should be in the form of a relative
562    filename, using '/' as the path separator. The parent directory name '..'
563    is not allowed, and nor is a rooted name (starting with a '/').
564
565    The function returns a binary string, which is the contents of the
566    specified resource.
567
568    For packages located in the filesystem, which have already been imported,
569    this is the rough equivalent of
570
571        d = os.path.dirname(sys.modules[package].__file__)
572        data = open(os.path.join(d, resource), 'rb').read()
573
574    If the package cannot be located or loaded, or it uses a PEP 302 loader
575    which does not support get_data(), then None is returned.
576    """
577
578    loader = get_loader(package)
579    if loader is None or not hasattr(loader, 'get_data'):
580        return None
581    mod = sys.modules.get(package) or loader.load_module(package)
582    if mod is None or not hasattr(mod, '__file__'):
583        return None
584
585    # Modify the resource name to be compatible with the loader.get_data
586    # signature - an os.path format "filename" starting with the dirname of
587    # the package's __file__
588    parts = resource.split('/')
589    parts.insert(0, os.path.dirname(mod.__file__))
590    resource_name = os.path.join(*parts)
591    return loader.get_data(resource_name)
592