1#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper script to help run clang tools across Chromium code.
7
8How to use this tool:
9If you want to run the tool across all Chromium code:
10run_tool.py <tool> <path/to/compiledb>
11
12If you only want to run the tool across just chrome/browser and content/browser:
13run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
14
15Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
16information, which documents the entire automated refactoring flow in Chromium.
17
18Why use this tool:
19The clang tool implementation doesn't take advantage of multiple cores, and if
20it fails mysteriously in the middle, all the generated replacements will be
21lost.
22
23Unfortunately, if the work is simply sharded across multiple cores by running
24multiple RefactoringTools, problems arise when they attempt to rewrite a file at
25the same time. To work around that, clang tools that are run using this tool
26should output edits to stdout in the following format:
27
28==== BEGIN EDITS ====
29r:<file path>:<offset>:<length>:<replacement text>
30r:<file path>:<offset>:<length>:<replacement text>
31...etc...
32==== END EDITS ====
33
34Any generated edits are applied once the clang tool has finished running
35across Chromium, regardless of whether some instances failed or not.
36"""
37
38import collections
39import functools
40import multiprocessing
41import os.path
42import pipes
43import subprocess
44import sys
45
46
47Edit = collections.namedtuple(
48    'Edit', ('edit_type', 'offset', 'length', 'replacement'))
49
50
51def _GetFilesFromGit(paths = None):
52  """Gets the list of files in the git repository.
53
54  Args:
55    paths: Prefix filter for the returned paths. May contain multiple entries.
56  """
57  args = ['git', 'ls-files']
58  if paths:
59    args.extend(paths)
60  command = subprocess.Popen(args, stdout=subprocess.PIPE)
61  output, _ = command.communicate()
62  return output.splitlines()
63
64
65def _ExtractEditsFromStdout(build_directory, stdout):
66  """Extracts generated list of edits from the tool's stdout.
67
68  The expected format is documented at the top of this file.
69
70  Args:
71    build_directory: Directory that contains the compile database. Used to
72      normalize the filenames.
73    stdout: The stdout from running the clang tool.
74
75  Returns:
76    A dictionary mapping filenames to the associated edits.
77  """
78  lines = stdout.splitlines()
79  start_index = lines.index('==== BEGIN EDITS ====')
80  end_index = lines.index('==== END EDITS ====')
81  edits = collections.defaultdict(list)
82  for line in lines[start_index + 1:end_index]:
83    try:
84      edit_type, path, offset, length, replacement = line.split(':', 4)
85      replacement = replacement.replace("\0", "\n");
86      # Normalize the file path emitted by the clang tool to be relative to the
87      # current working directory.
88      path = os.path.relpath(os.path.join(build_directory, path))
89      edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
90    except ValueError:
91      print 'Unable to parse edit: %s' % line
92  return edits
93
94
95def _ExecuteTool(toolname, build_directory, filename):
96  """Executes the tool.
97
98  This is defined outside the class so it can be pickled for the multiprocessing
99  module.
100
101  Args:
102    toolname: Path to the tool to execute.
103    build_directory: Directory that contains the compile database.
104    filename: The file to run the tool over.
105
106  Returns:
107    A dictionary that must contain the key "status" and a boolean value
108    associated with it.
109
110    If status is True, then the generated edits are stored with the key "edits"
111    in the dictionary.
112
113    Otherwise, the filename and the output from stderr are associated with the
114    keys "filename" and "stderr" respectively.
115  """
116  command = subprocess.Popen((toolname, '-p', build_directory, filename),
117                             stdout=subprocess.PIPE,
118                             stderr=subprocess.PIPE)
119  stdout, stderr = command.communicate()
120  if command.returncode != 0:
121    return {'status': False, 'filename': filename, 'stderr': stderr}
122  else:
123    return {'status': True,
124            'edits': _ExtractEditsFromStdout(build_directory, stdout)}
125
126
127class _CompilerDispatcher(object):
128  """Multiprocessing controller for running clang tools in parallel."""
129
130  def __init__(self, toolname, build_directory, filenames):
131    """Initializer method.
132
133    Args:
134      toolname: Path to the tool to execute.
135      build_directory: Directory that contains the compile database.
136      filenames: The files to run the tool over.
137    """
138    self.__toolname = toolname
139    self.__build_directory = build_directory
140    self.__filenames = filenames
141    self.__success_count = 0
142    self.__failed_count = 0
143    self.__edits = collections.defaultdict(list)
144
145  @property
146  def edits(self):
147    return self.__edits
148
149  @property
150  def failed_count(self):
151    return self.__failed_count
152
153  def Run(self):
154    """Does the grunt work."""
155    pool = multiprocessing.Pool()
156    result_iterator = pool.imap_unordered(
157        functools.partial(_ExecuteTool, self.__toolname,
158                          self.__build_directory),
159        self.__filenames)
160    for result in result_iterator:
161      self.__ProcessResult(result)
162    sys.stdout.write('\n')
163    sys.stdout.flush()
164
165  def __ProcessResult(self, result):
166    """Handles result processing.
167
168    Args:
169      result: The result dictionary returned by _ExecuteTool.
170    """
171    if result['status']:
172      self.__success_count += 1
173      for k, v in result['edits'].iteritems():
174        self.__edits[k].extend(v)
175    else:
176      self.__failed_count += 1
177      sys.stdout.write('\nFailed to process %s\n' % result['filename'])
178      sys.stdout.write(result['stderr'])
179      sys.stdout.write('\n')
180    percentage = (
181        float(self.__success_count + self.__failed_count) /
182        len(self.__filenames)) * 100
183    sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
184        self.__success_count, self.__failed_count, percentage))
185    sys.stdout.flush()
186
187
188def _ApplyEdits(edits, clang_format_diff_path):
189  """Apply the generated edits.
190
191  Args:
192    edits: A dict mapping filenames to Edit instances that apply to that file.
193    clang_format_diff_path: Path to the clang-format-diff.py helper to help
194      automatically reformat diffs to avoid style violations. Pass None if the
195      clang-format step should be skipped.
196  """
197  edit_count = 0
198  for k, v in edits.iteritems():
199    # Sort the edits and iterate through them in reverse order. Sorting allows
200    # duplicate edits to be quickly skipped, while reversing means that
201    # subsequent edits don't need to have their offsets updated with each edit
202    # applied.
203    v.sort()
204    last_edit = None
205    with open(k, 'rb+') as f:
206      contents = bytearray(f.read())
207      for edit in reversed(v):
208        if edit == last_edit:
209          continue
210        last_edit = edit
211        contents[edit.offset:edit.offset + edit.length] = edit.replacement
212        if not edit.replacement:
213          _ExtendDeletionIfElementIsInList(contents, edit.offset)
214        edit_count += 1
215      f.seek(0)
216      f.truncate()
217      f.write(contents)
218    if clang_format_diff_path:
219      # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
220      # uses python2.7. Use the deprecated interface until Chrome uses a newer
221      # Python.
222      if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
223          pipes.quote(k), clang_format_diff_path), shell=True) != 0:
224        print 'clang-format failed for %s' % k
225  print 'Applied %d edits to %d files' % (edit_count, len(edits))
226
227
228_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
229
230
231def _ExtendDeletionIfElementIsInList(contents, offset):
232  """Extends the range of a deletion if the deleted element was part of a list.
233
234  This rewriter helper makes it easy for refactoring tools to remove elements
235  from a list. Even if a matcher callback knows that it is removing an element
236  from a list, it may not have enough information to accurately remove the list
237  element; for example, another matcher callback may end up removing an adjacent
238  list element, or all the list elements may end up being removed.
239
240  With this helper, refactoring tools can simply remove the list element and not
241  worry about having to include the comma in the replacement.
242
243  Args:
244    contents: A bytearray with the deletion already applied.
245    offset: The offset in the bytearray where the deleted range used to be.
246  """
247  char_before = char_after = None
248  left_trim_count = 0
249  for byte in reversed(contents[:offset]):
250    left_trim_count += 1
251    if byte in _WHITESPACE_BYTES:
252      continue
253    if byte in (ord(','), ord(':'), ord('('), ord('{')):
254      char_before = chr(byte)
255    break
256
257  right_trim_count = 0
258  for byte in contents[offset:]:
259    right_trim_count += 1
260    if byte in _WHITESPACE_BYTES:
261      continue
262    if byte == ord(','):
263      char_after = chr(byte)
264    break
265
266  if char_before:
267    if char_after:
268      del contents[offset:offset + right_trim_count]
269    elif char_before in (',', ':'):
270      del contents[offset - left_trim_count:offset]
271
272
273def main(argv):
274  if len(argv) < 2:
275    print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
276    print '  <clang tool> is the clang tool that should be run.'
277    print '  <compile db> is the directory that contains the compile database'
278    print '  <path 1> <path2> ... can be used to filter what files are edited'
279    return 1
280
281  clang_format_diff_path = os.path.join(
282      os.path.dirname(os.path.realpath(__file__)),
283      '../../../third_party/llvm/tools/clang/tools/clang-format',
284      'clang-format-diff.py')
285  # TODO(dcheng): Allow this to be controlled with a flag as well.
286  if not os.path.isfile(clang_format_diff_path):
287    clang_format_diff_path = None
288
289  filenames = frozenset(_GetFilesFromGit(argv[2:]))
290  # Filter out files that aren't C/C++/Obj-C/Obj-C++.
291  extensions = frozenset(('.c', '.cc', '.m', '.mm'))
292  dispatcher = _CompilerDispatcher(argv[0], argv[1],
293                                   [f for f in filenames
294                                    if os.path.splitext(f)[1] in extensions])
295  dispatcher.Run()
296  # Filter out edits to files that aren't in the git repository, since it's not
297  # useful to modify files that aren't under source control--typically, these
298  # are generated files or files in a git submodule that's not part of Chromium.
299  _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
300                    if k in filenames},
301              clang_format_diff_path)
302  if dispatcher.failed_count != 0:
303    return 2
304  return 0
305
306
307if __name__ == '__main__':
308  sys.exit(main(sys.argv[1:]))
309