1#!/usr/bin/env python 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Wrapper script to help run clang tools across Chromium code. 7 8How to use this tool: 9If you want to run the tool across all Chromium code: 10run_tool.py <tool> <path/to/compiledb> 11 12If you only want to run the tool across just chrome/browser and content/browser: 13run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser 14 15Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more 16information, which documents the entire automated refactoring flow in Chromium. 17 18Why use this tool: 19The clang tool implementation doesn't take advantage of multiple cores, and if 20it fails mysteriously in the middle, all the generated replacements will be 21lost. 22 23Unfortunately, if the work is simply sharded across multiple cores by running 24multiple RefactoringTools, problems arise when they attempt to rewrite a file at 25the same time. To work around that, clang tools that are run using this tool 26should output edits to stdout in the following format: 27 28==== BEGIN EDITS ==== 29r:<file path>:<offset>:<length>:<replacement text> 30r:<file path>:<offset>:<length>:<replacement text> 31...etc... 32==== END EDITS ==== 33 34Any generated edits are applied once the clang tool has finished running 35across Chromium, regardless of whether some instances failed or not. 36""" 37 38import collections 39import functools 40import multiprocessing 41import os.path 42import pipes 43import subprocess 44import sys 45 46 47Edit = collections.namedtuple( 48 'Edit', ('edit_type', 'offset', 'length', 'replacement')) 49 50 51def _GetFilesFromGit(paths = None): 52 """Gets the list of files in the git repository. 53 54 Args: 55 paths: Prefix filter for the returned paths. May contain multiple entries. 56 """ 57 args = ['git', 'ls-files'] 58 if paths: 59 args.extend(paths) 60 command = subprocess.Popen(args, stdout=subprocess.PIPE) 61 output, _ = command.communicate() 62 return output.splitlines() 63 64 65def _ExtractEditsFromStdout(build_directory, stdout): 66 """Extracts generated list of edits from the tool's stdout. 67 68 The expected format is documented at the top of this file. 69 70 Args: 71 build_directory: Directory that contains the compile database. Used to 72 normalize the filenames. 73 stdout: The stdout from running the clang tool. 74 75 Returns: 76 A dictionary mapping filenames to the associated edits. 77 """ 78 lines = stdout.splitlines() 79 start_index = lines.index('==== BEGIN EDITS ====') 80 end_index = lines.index('==== END EDITS ====') 81 edits = collections.defaultdict(list) 82 for line in lines[start_index + 1:end_index]: 83 try: 84 edit_type, path, offset, length, replacement = line.split(':', 4) 85 replacement = replacement.replace("\0", "\n"); 86 # Normalize the file path emitted by the clang tool to be relative to the 87 # current working directory. 88 path = os.path.relpath(os.path.join(build_directory, path)) 89 edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) 90 except ValueError: 91 print 'Unable to parse edit: %s' % line 92 return edits 93 94 95def _ExecuteTool(toolname, build_directory, filename): 96 """Executes the tool. 97 98 This is defined outside the class so it can be pickled for the multiprocessing 99 module. 100 101 Args: 102 toolname: Path to the tool to execute. 103 build_directory: Directory that contains the compile database. 104 filename: The file to run the tool over. 105 106 Returns: 107 A dictionary that must contain the key "status" and a boolean value 108 associated with it. 109 110 If status is True, then the generated edits are stored with the key "edits" 111 in the dictionary. 112 113 Otherwise, the filename and the output from stderr are associated with the 114 keys "filename" and "stderr" respectively. 115 """ 116 command = subprocess.Popen((toolname, '-p', build_directory, filename), 117 stdout=subprocess.PIPE, 118 stderr=subprocess.PIPE) 119 stdout, stderr = command.communicate() 120 if command.returncode != 0: 121 return {'status': False, 'filename': filename, 'stderr': stderr} 122 else: 123 return {'status': True, 124 'edits': _ExtractEditsFromStdout(build_directory, stdout)} 125 126 127class _CompilerDispatcher(object): 128 """Multiprocessing controller for running clang tools in parallel.""" 129 130 def __init__(self, toolname, build_directory, filenames): 131 """Initializer method. 132 133 Args: 134 toolname: Path to the tool to execute. 135 build_directory: Directory that contains the compile database. 136 filenames: The files to run the tool over. 137 """ 138 self.__toolname = toolname 139 self.__build_directory = build_directory 140 self.__filenames = filenames 141 self.__success_count = 0 142 self.__failed_count = 0 143 self.__edits = collections.defaultdict(list) 144 145 @property 146 def edits(self): 147 return self.__edits 148 149 @property 150 def failed_count(self): 151 return self.__failed_count 152 153 def Run(self): 154 """Does the grunt work.""" 155 pool = multiprocessing.Pool() 156 result_iterator = pool.imap_unordered( 157 functools.partial(_ExecuteTool, self.__toolname, 158 self.__build_directory), 159 self.__filenames) 160 for result in result_iterator: 161 self.__ProcessResult(result) 162 sys.stdout.write('\n') 163 sys.stdout.flush() 164 165 def __ProcessResult(self, result): 166 """Handles result processing. 167 168 Args: 169 result: The result dictionary returned by _ExecuteTool. 170 """ 171 if result['status']: 172 self.__success_count += 1 173 for k, v in result['edits'].iteritems(): 174 self.__edits[k].extend(v) 175 else: 176 self.__failed_count += 1 177 sys.stdout.write('\nFailed to process %s\n' % result['filename']) 178 sys.stdout.write(result['stderr']) 179 sys.stdout.write('\n') 180 percentage = ( 181 float(self.__success_count + self.__failed_count) / 182 len(self.__filenames)) * 100 183 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % ( 184 self.__success_count, self.__failed_count, percentage)) 185 sys.stdout.flush() 186 187 188def _ApplyEdits(edits, clang_format_diff_path): 189 """Apply the generated edits. 190 191 Args: 192 edits: A dict mapping filenames to Edit instances that apply to that file. 193 clang_format_diff_path: Path to the clang-format-diff.py helper to help 194 automatically reformat diffs to avoid style violations. Pass None if the 195 clang-format step should be skipped. 196 """ 197 edit_count = 0 198 for k, v in edits.iteritems(): 199 # Sort the edits and iterate through them in reverse order. Sorting allows 200 # duplicate edits to be quickly skipped, while reversing means that 201 # subsequent edits don't need to have their offsets updated with each edit 202 # applied. 203 v.sort() 204 last_edit = None 205 with open(k, 'rb+') as f: 206 contents = bytearray(f.read()) 207 for edit in reversed(v): 208 if edit == last_edit: 209 continue 210 last_edit = edit 211 contents[edit.offset:edit.offset + edit.length] = edit.replacement 212 if not edit.replacement: 213 _ExtendDeletionIfElementIsInList(contents, edit.offset) 214 edit_count += 1 215 f.seek(0) 216 f.truncate() 217 f.write(contents) 218 if clang_format_diff_path: 219 # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome 220 # uses python2.7. Use the deprecated interface until Chrome uses a newer 221 # Python. 222 if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % ( 223 pipes.quote(k), clang_format_diff_path), shell=True) != 0: 224 print 'clang-format failed for %s' % k 225 print 'Applied %d edits to %d files' % (edit_count, len(edits)) 226 227 228_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) 229 230 231def _ExtendDeletionIfElementIsInList(contents, offset): 232 """Extends the range of a deletion if the deleted element was part of a list. 233 234 This rewriter helper makes it easy for refactoring tools to remove elements 235 from a list. Even if a matcher callback knows that it is removing an element 236 from a list, it may not have enough information to accurately remove the list 237 element; for example, another matcher callback may end up removing an adjacent 238 list element, or all the list elements may end up being removed. 239 240 With this helper, refactoring tools can simply remove the list element and not 241 worry about having to include the comma in the replacement. 242 243 Args: 244 contents: A bytearray with the deletion already applied. 245 offset: The offset in the bytearray where the deleted range used to be. 246 """ 247 char_before = char_after = None 248 left_trim_count = 0 249 for byte in reversed(contents[:offset]): 250 left_trim_count += 1 251 if byte in _WHITESPACE_BYTES: 252 continue 253 if byte in (ord(','), ord(':'), ord('('), ord('{')): 254 char_before = chr(byte) 255 break 256 257 right_trim_count = 0 258 for byte in contents[offset:]: 259 right_trim_count += 1 260 if byte in _WHITESPACE_BYTES: 261 continue 262 if byte == ord(','): 263 char_after = chr(byte) 264 break 265 266 if char_before: 267 if char_after: 268 del contents[offset:offset + right_trim_count] 269 elif char_before in (',', ':'): 270 del contents[offset - left_trim_count:offset] 271 272 273def main(argv): 274 if len(argv) < 2: 275 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...' 276 print ' <clang tool> is the clang tool that should be run.' 277 print ' <compile db> is the directory that contains the compile database' 278 print ' <path 1> <path2> ... can be used to filter what files are edited' 279 return 1 280 281 clang_format_diff_path = os.path.join( 282 os.path.dirname(os.path.realpath(__file__)), 283 '../../../third_party/llvm/tools/clang/tools/clang-format', 284 'clang-format-diff.py') 285 # TODO(dcheng): Allow this to be controlled with a flag as well. 286 if not os.path.isfile(clang_format_diff_path): 287 clang_format_diff_path = None 288 289 filenames = frozenset(_GetFilesFromGit(argv[2:])) 290 # Filter out files that aren't C/C++/Obj-C/Obj-C++. 291 extensions = frozenset(('.c', '.cc', '.m', '.mm')) 292 dispatcher = _CompilerDispatcher(argv[0], argv[1], 293 [f for f in filenames 294 if os.path.splitext(f)[1] in extensions]) 295 dispatcher.Run() 296 # Filter out edits to files that aren't in the git repository, since it's not 297 # useful to modify files that aren't under source control--typically, these 298 # are generated files or files in a git submodule that's not part of Chromium. 299 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems() 300 if k in filenames}, 301 clang_format_diff_path) 302 if dispatcher.failed_count != 0: 303 return 2 304 return 0 305 306 307if __name__ == '__main__': 308 sys.exit(main(sys.argv[1:])) 309