1"""File wrangling."""
2
3from coverage.backward import to_string
4from coverage.misc import CoverageException
5import fnmatch, os, re, sys
6
7class FileLocator(object):
8    """Understand how filenames work."""
9
10    def __init__(self):
11        # The absolute path to our current directory.
12        self.relative_dir = self.abs_file(os.curdir) + os.sep
13
14        # Cache of results of calling the canonical_filename() method, to
15        # avoid duplicating work.
16        self.canonical_filename_cache = {}
17
18    def abs_file(self, filename):
19        """Return the absolute normalized form of `filename`."""
20        return os.path.normcase(os.path.abspath(os.path.realpath(filename)))
21
22    def relative_filename(self, filename):
23        """Return the relative form of `filename`.
24
25        The filename will be relative to the current directory when the
26        `FileLocator` was constructed.
27
28        """
29        if filename.startswith(self.relative_dir):
30            filename = filename.replace(self.relative_dir, "")
31        return filename
32
33    def canonical_filename(self, filename):
34        """Return a canonical filename for `filename`.
35
36        An absolute path with no redundant components and normalized case.
37
38        """
39        if filename not in self.canonical_filename_cache:
40            f = filename
41            if os.path.isabs(f) and not os.path.exists(f):
42                if self.get_zip_data(f) is None:
43                    f = os.path.basename(f)
44            if not os.path.isabs(f):
45                for path in [os.curdir] + sys.path:
46                    if path is None:
47                        continue
48                    g = os.path.join(path, f)
49                    if os.path.exists(g):
50                        f = g
51                        break
52            cf = self.abs_file(f)
53            self.canonical_filename_cache[filename] = cf
54        return self.canonical_filename_cache[filename]
55
56    def get_zip_data(self, filename):
57        """Get data from `filename` if it is a zip file path.
58
59        Returns the string data read from the zip file, or None if no zip file
60        could be found or `filename` isn't in it.  The data returned will be
61        an empty string if the file is empty.
62
63        """
64        import zipimport
65        markers = ['.zip'+os.sep, '.egg'+os.sep]
66        for marker in markers:
67            if marker in filename:
68                parts = filename.split(marker)
69                try:
70                    zi = zipimport.zipimporter(parts[0]+marker[:-1])
71                except zipimport.ZipImportError:
72                    continue
73                try:
74                    data = zi.get_data(parts[1])
75                except IOError:
76                    continue
77                return to_string(data)
78        return None
79
80
81class TreeMatcher(object):
82    """A matcher for files in a tree."""
83    def __init__(self, directories):
84        self.dirs = directories[:]
85
86    def __repr__(self):
87        return "<TreeMatcher %r>" % self.dirs
88
89    def add(self, directory):
90        """Add another directory to the list we match for."""
91        self.dirs.append(directory)
92
93    def match(self, fpath):
94        """Does `fpath` indicate a file in one of our trees?"""
95        for d in self.dirs:
96            if fpath.startswith(d):
97                if fpath == d:
98                    # This is the same file!
99                    return True
100                if fpath[len(d)] == os.sep:
101                    # This is a file in the directory
102                    return True
103        return False
104
105
106class FnmatchMatcher(object):
107    """A matcher for files by filename pattern."""
108    def __init__(self, pats):
109        self.pats = pats[:]
110
111    def __repr__(self):
112        return "<FnmatchMatcher %r>" % self.pats
113
114    def match(self, fpath):
115        """Does `fpath` match one of our filename patterns?"""
116        for pat in self.pats:
117            if fnmatch.fnmatch(fpath, pat):
118                return True
119        return False
120
121
122def sep(s):
123    """Find the path separator used in this string, or os.sep if none."""
124    sep_match = re.search(r"[\\/]", s)
125    if sep_match:
126        the_sep = sep_match.group(0)
127    else:
128        the_sep = os.sep
129    return the_sep
130
131
132class PathAliases(object):
133    """A collection of aliases for paths.
134
135    When combining data files from remote machines, often the paths to source
136    code are different, for example, due to OS differences, or because of
137    serialized checkouts on continuous integration machines.
138
139    A `PathAliases` object tracks a list of pattern/result pairs, and can
140    map a path through those aliases to produce a unified path.
141
142    `locator` is a FileLocator that is used to canonicalize the results.
143
144    """
145    def __init__(self, locator=None):
146        self.aliases = []
147        self.locator = locator
148
149    def add(self, pattern, result):
150        """Add the `pattern`/`result` pair to the list of aliases.
151
152        `pattern` is an `fnmatch`-style pattern.  `result` is a simple
153        string.  When mapping paths, if a path starts with a match against
154        `pattern`, then that match is replaced with `result`.  This models
155        isomorphic source trees being rooted at different places on two
156        different machines.
157
158        `pattern` can't end with a wildcard component, since that would
159        match an entire tree, and not just its root.
160
161        """
162        # The pattern can't end with a wildcard component.
163        pattern = pattern.rstrip(r"\/")
164        if pattern.endswith("*"):
165            raise CoverageException("Pattern must not end with wildcards.")
166        pattern_sep = sep(pattern)
167        pattern += pattern_sep
168
169        # Make a regex from the pattern.  fnmatch always adds a \Z or $ to
170        # match the whole string, which we don't want.
171        regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(')
172        if regex_pat.endswith("$"):
173            regex_pat = regex_pat[:-1]
174        # We want */a/b.py to match on Windows to, so change slash to match
175        # either separator.
176        regex_pat = regex_pat.replace(r"\/", r"[\\/]")
177        # We want case-insensitive matching, so add that flag.
178        regex = re.compile("(?i)" + regex_pat)
179
180        # Normalize the result: it must end with a path separator.
181        result_sep = sep(result)
182        result = result.rstrip(r"\/") + result_sep
183        self.aliases.append((regex, result, pattern_sep, result_sep))
184
185    def map(self, path):
186        """Map `path` through the aliases.
187
188        `path` is checked against all of the patterns.  The first pattern to
189        match is used to replace the root of the path with the result root.
190        Only one pattern is ever used.  If no patterns match, `path` is
191        returned unchanged.
192
193        The separator style in the result is made to match that of the result
194        in the alias.
195
196        """
197        for regex, result, pattern_sep, result_sep in self.aliases:
198            m = regex.match(path)
199            if m:
200                new = path.replace(m.group(0), result)
201                if pattern_sep != result_sep:
202                    new = new.replace(pattern_sep, result_sep)
203                if self.locator:
204                    new = self.locator.canonical_filename(new)
205                return new
206        return path
207
208
209def find_python_files(dirname):
210    """Yield all of the importable Python files in `dirname`, recursively."""
211    for dirpath, dirnames, filenames in os.walk(dirname, topdown=True):
212        if '__init__.py' not in filenames:
213            # If a directory doesn't have __init__.py, then it isn't
214            # importable and neither are its files
215            del dirnames[:]
216            continue
217        for filename in filenames:
218            if fnmatch.fnmatch(filename, "*.py"):
219                yield os.path.join(dirpath, filename)
220