1"""File wrangling.""" 2 3from coverage.backward import to_string 4from coverage.misc import CoverageException 5import fnmatch, os, re, sys 6 7class FileLocator(object): 8 """Understand how filenames work.""" 9 10 def __init__(self): 11 # The absolute path to our current directory. 12 self.relative_dir = self.abs_file(os.curdir) + os.sep 13 14 # Cache of results of calling the canonical_filename() method, to 15 # avoid duplicating work. 16 self.canonical_filename_cache = {} 17 18 def abs_file(self, filename): 19 """Return the absolute normalized form of `filename`.""" 20 return os.path.normcase(os.path.abspath(os.path.realpath(filename))) 21 22 def relative_filename(self, filename): 23 """Return the relative form of `filename`. 24 25 The filename will be relative to the current directory when the 26 `FileLocator` was constructed. 27 28 """ 29 if filename.startswith(self.relative_dir): 30 filename = filename.replace(self.relative_dir, "") 31 return filename 32 33 def canonical_filename(self, filename): 34 """Return a canonical filename for `filename`. 35 36 An absolute path with no redundant components and normalized case. 37 38 """ 39 if filename not in self.canonical_filename_cache: 40 f = filename 41 if os.path.isabs(f) and not os.path.exists(f): 42 if self.get_zip_data(f) is None: 43 f = os.path.basename(f) 44 if not os.path.isabs(f): 45 for path in [os.curdir] + sys.path: 46 if path is None: 47 continue 48 g = os.path.join(path, f) 49 if os.path.exists(g): 50 f = g 51 break 52 cf = self.abs_file(f) 53 self.canonical_filename_cache[filename] = cf 54 return self.canonical_filename_cache[filename] 55 56 def get_zip_data(self, filename): 57 """Get data from `filename` if it is a zip file path. 58 59 Returns the string data read from the zip file, or None if no zip file 60 could be found or `filename` isn't in it. The data returned will be 61 an empty string if the file is empty. 62 63 """ 64 import zipimport 65 markers = ['.zip'+os.sep, '.egg'+os.sep] 66 for marker in markers: 67 if marker in filename: 68 parts = filename.split(marker) 69 try: 70 zi = zipimport.zipimporter(parts[0]+marker[:-1]) 71 except zipimport.ZipImportError: 72 continue 73 try: 74 data = zi.get_data(parts[1]) 75 except IOError: 76 continue 77 return to_string(data) 78 return None 79 80 81class TreeMatcher(object): 82 """A matcher for files in a tree.""" 83 def __init__(self, directories): 84 self.dirs = directories[:] 85 86 def __repr__(self): 87 return "<TreeMatcher %r>" % self.dirs 88 89 def add(self, directory): 90 """Add another directory to the list we match for.""" 91 self.dirs.append(directory) 92 93 def match(self, fpath): 94 """Does `fpath` indicate a file in one of our trees?""" 95 for d in self.dirs: 96 if fpath.startswith(d): 97 if fpath == d: 98 # This is the same file! 99 return True 100 if fpath[len(d)] == os.sep: 101 # This is a file in the directory 102 return True 103 return False 104 105 106class FnmatchMatcher(object): 107 """A matcher for files by filename pattern.""" 108 def __init__(self, pats): 109 self.pats = pats[:] 110 111 def __repr__(self): 112 return "<FnmatchMatcher %r>" % self.pats 113 114 def match(self, fpath): 115 """Does `fpath` match one of our filename patterns?""" 116 for pat in self.pats: 117 if fnmatch.fnmatch(fpath, pat): 118 return True 119 return False 120 121 122def sep(s): 123 """Find the path separator used in this string, or os.sep if none.""" 124 sep_match = re.search(r"[\\/]", s) 125 if sep_match: 126 the_sep = sep_match.group(0) 127 else: 128 the_sep = os.sep 129 return the_sep 130 131 132class PathAliases(object): 133 """A collection of aliases for paths. 134 135 When combining data files from remote machines, often the paths to source 136 code are different, for example, due to OS differences, or because of 137 serialized checkouts on continuous integration machines. 138 139 A `PathAliases` object tracks a list of pattern/result pairs, and can 140 map a path through those aliases to produce a unified path. 141 142 `locator` is a FileLocator that is used to canonicalize the results. 143 144 """ 145 def __init__(self, locator=None): 146 self.aliases = [] 147 self.locator = locator 148 149 def add(self, pattern, result): 150 """Add the `pattern`/`result` pair to the list of aliases. 151 152 `pattern` is an `fnmatch`-style pattern. `result` is a simple 153 string. When mapping paths, if a path starts with a match against 154 `pattern`, then that match is replaced with `result`. This models 155 isomorphic source trees being rooted at different places on two 156 different machines. 157 158 `pattern` can't end with a wildcard component, since that would 159 match an entire tree, and not just its root. 160 161 """ 162 # The pattern can't end with a wildcard component. 163 pattern = pattern.rstrip(r"\/") 164 if pattern.endswith("*"): 165 raise CoverageException("Pattern must not end with wildcards.") 166 pattern_sep = sep(pattern) 167 pattern += pattern_sep 168 169 # Make a regex from the pattern. fnmatch always adds a \Z or $ to 170 # match the whole string, which we don't want. 171 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') 172 if regex_pat.endswith("$"): 173 regex_pat = regex_pat[:-1] 174 # We want */a/b.py to match on Windows to, so change slash to match 175 # either separator. 176 regex_pat = regex_pat.replace(r"\/", r"[\\/]") 177 # We want case-insensitive matching, so add that flag. 178 regex = re.compile("(?i)" + regex_pat) 179 180 # Normalize the result: it must end with a path separator. 181 result_sep = sep(result) 182 result = result.rstrip(r"\/") + result_sep 183 self.aliases.append((regex, result, pattern_sep, result_sep)) 184 185 def map(self, path): 186 """Map `path` through the aliases. 187 188 `path` is checked against all of the patterns. The first pattern to 189 match is used to replace the root of the path with the result root. 190 Only one pattern is ever used. If no patterns match, `path` is 191 returned unchanged. 192 193 The separator style in the result is made to match that of the result 194 in the alias. 195 196 """ 197 for regex, result, pattern_sep, result_sep in self.aliases: 198 m = regex.match(path) 199 if m: 200 new = path.replace(m.group(0), result) 201 if pattern_sep != result_sep: 202 new = new.replace(pattern_sep, result_sep) 203 if self.locator: 204 new = self.locator.canonical_filename(new) 205 return new 206 return path 207 208 209def find_python_files(dirname): 210 """Yield all of the importable Python files in `dirname`, recursively.""" 211 for dirpath, dirnames, filenames in os.walk(dirname, topdown=True): 212 if '__init__.py' not in filenames: 213 # If a directory doesn't have __init__.py, then it isn't 214 # importable and neither are its files 215 del dirnames[:] 216 continue 217 for filename in filenames: 218 if fnmatch.fnmatch(filename, "*.py"): 219 yield os.path.join(dirpath, filename) 220