1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5#                     The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10#===------------------------------------------------------------------------===#
11
12r"""                                                                             
13clang-format git integration                                                     
14============================                                                     
15                                                                                 
16This file provides a clang-format integration for git. Put it somewhere in your  
17path and ensure that it is executable. Then, "git clang-format" will invoke      
18clang-format on the changes in current files or a specific commit.               
19                                                                                 
20For further details, run:                                                        
21git clang-format -h                                                              
22                                                                                 
23Requires Python 2.7                                                              
24"""               
25
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
36
37desc = '''
38Run clang-format on all lines that differ between the working directory
39and <commit>, which defaults to HEAD.  Changes are only applied to the working
40directory.
41
42The following git-config settings set the default of the corresponding option:
43  clangFormat.binary
44  clangFormat.commit
45  clangFormat.extension
46  clangFormat.style
47'''
48
49# Name of the temporary index file in which save the output of clang-format.
50# This file is created within the .git directory.
51temp_index_basename = 'clang-format-index'
52
53
54Range = collections.namedtuple('Range', 'start, count')
55
56
57def main():
58  config = load_git_config()
59
60  # In order to keep '--' yet allow options after positionals, we need to
61  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
62  # nargs=argparse.REMAINDER disallows options after positionals.)
63  argv = sys.argv[1:]
64  try:
65    idx = argv.index('--')
66  except ValueError:
67    dash_dash = []
68  else:
69    dash_dash = argv[idx:]
70    argv = argv[:idx]
71
72  default_extensions = ','.join([
73      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
74      'c', 'h',  # C
75      'm',  # ObjC
76      'mm',  # ObjC++
77      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
78      # Other languages that clang-format supports
79      'proto', 'protodevel',  # Protocol Buffers
80      'js',  # JavaScript
81      ])
82
83  p = argparse.ArgumentParser(
84    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
85    description=desc)
86  p.add_argument('--binary',
87                 default=config.get('clangformat.binary', 'clang-format'),
88                 help='path to clang-format'),
89  p.add_argument('--commit',
90                 default=config.get('clangformat.commit', 'HEAD'),
91                 help='default commit to use if none is specified'),
92  p.add_argument('--diff', action='store_true',
93                 help='print a diff instead of applying the changes')
94  p.add_argument('--extensions',
95                 default=config.get('clangformat.extensions',
96                                    default_extensions),
97                 help=('comma-separated list of file extensions to format, '
98                       'excluding the period and case-insensitive')),
99  p.add_argument('-f', '--force', action='store_true',
100                 help='allow changes to unstaged files')
101  p.add_argument('-p', '--patch', action='store_true',
102                 help='select hunks interactively')
103  p.add_argument('-q', '--quiet', action='count', default=0,
104                 help='print less information')
105  p.add_argument('--style',
106                 default=config.get('clangformat.style', None),
107                 help='passed to clang-format'),
108  p.add_argument('-v', '--verbose', action='count', default=0,
109                 help='print extra information')
110  # We gather all the remaining positional arguments into 'args' since we need
111  # to use some heuristics to determine whether or not <commit> was present.
112  # However, to print pretty messages, we make use of metavar and help.
113  p.add_argument('args', nargs='*', metavar='<commit>',
114                 help='revision from which to compute the diff')
115  p.add_argument('ignored', nargs='*', metavar='<file>...',
116                 help='if specified, only consider differences in these files')
117  opts = p.parse_args(argv)
118
119  opts.verbose -= opts.quiet
120  del opts.quiet
121
122  commit, files = interpret_args(opts.args, dash_dash, opts.commit)
123  changed_lines = compute_diff_and_extract_lines(commit, files)
124  if opts.verbose >= 1:
125    ignored_files = set(changed_lines)
126  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
127  if opts.verbose >= 1:
128    ignored_files.difference_update(changed_lines)
129    if ignored_files:
130      print 'Ignoring changes in the following files (wrong extension):'
131      for filename in ignored_files:
132        print '   ', filename
133    if changed_lines:
134      print 'Running clang-format on the following files:'
135      for filename in changed_lines:
136        print '   ', filename
137  if not changed_lines:
138    print 'no modified files to format'
139    return
140  # The computed diff outputs absolute paths, so we must cd before accessing
141  # those files.
142  cd_to_toplevel()
143  old_tree = create_tree_from_workdir(changed_lines)
144  new_tree = run_clang_format_and_save_to_tree(changed_lines,
145                                               binary=opts.binary,
146                                               style=opts.style)
147  if opts.verbose >= 1:
148    print 'old tree:', old_tree
149    print 'new tree:', new_tree
150  if old_tree == new_tree:
151    if opts.verbose >= 0:
152      print 'clang-format did not modify any files'
153  elif opts.diff:
154    print_diff(old_tree, new_tree)
155  else:
156    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
157                                  patch_mode=opts.patch)
158    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
159      print 'changed files:'
160      for filename in changed_files:
161        print '   ', filename
162
163
164def load_git_config(non_string_options=None):
165  """Return the git configuration as a dictionary.
166
167  All options are assumed to be strings unless in `non_string_options`, in which
168  is a dictionary mapping option name (in lower case) to either "--bool" or
169  "--int"."""
170  if non_string_options is None:
171    non_string_options = {}
172  out = {}
173  for entry in run('git', 'config', '--list', '--null').split('\0'):
174    if entry:
175      name, value = entry.split('\n', 1)
176      if name in non_string_options:
177        value = run('git', 'config', non_string_options[name], name)
178      out[name] = value
179  return out
180
181
182def interpret_args(args, dash_dash, default_commit):
183  """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
184
185  It is assumed that "--" and everything that follows has been removed from
186  args and placed in `dash_dash`.
187
188  If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
189  left (if present) is taken as commit.  Otherwise, the first argument is
190  checked if it is a commit or a file.  If commit is not given,
191  `default_commit` is used."""
192  if dash_dash:
193    if len(args) == 0:
194      commit = default_commit
195    elif len(args) > 1:
196      die('at most one commit allowed; %d given' % len(args))
197    else:
198      commit = args[0]
199    object_type = get_object_type(commit)
200    if object_type not in ('commit', 'tag'):
201      if object_type is None:
202        die("'%s' is not a commit" % commit)
203      else:
204        die("'%s' is a %s, but a commit was expected" % (commit, object_type))
205    files = dash_dash[1:]
206  elif args:
207    if disambiguate_revision(args[0]):
208      commit = args[0]
209      files = args[1:]
210    else:
211      commit = default_commit
212      files = args
213  else:
214    commit = default_commit
215    files = []
216  return commit, files
217
218
219def disambiguate_revision(value):
220  """Returns True if `value` is a revision, False if it is a file, or dies."""
221  # If `value` is ambiguous (neither a commit nor a file), the following
222  # command will die with an appropriate error message.
223  run('git', 'rev-parse', value, verbose=False)
224  object_type = get_object_type(value)
225  if object_type is None:
226    return False
227  if object_type in ('commit', 'tag'):
228    return True
229  die('`%s` is a %s, but a commit or filename was expected' %
230      (value, object_type))
231
232
233def get_object_type(value):
234  """Returns a string description of an object's type, or None if it is not
235  a valid git object."""
236  cmd = ['git', 'cat-file', '-t', value]
237  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
238  stdout, stderr = p.communicate()
239  if p.returncode != 0:
240    return None
241  return stdout.strip()
242
243
244def compute_diff_and_extract_lines(commit, files):
245  """Calls compute_diff() followed by extract_lines()."""
246  diff_process = compute_diff(commit, files)
247  changed_lines = extract_lines(diff_process.stdout)
248  diff_process.stdout.close()
249  diff_process.wait()
250  if diff_process.returncode != 0:
251    # Assume error was already printed to stderr.
252    sys.exit(2)
253  return changed_lines
254
255
256def compute_diff(commit, files):
257  """Return a subprocess object producing the diff from `commit`.
258
259  The return value's `stdin` file object will produce a patch with the
260  differences between the working directory and `commit`, filtered on `files`
261  (if non-empty).  Zero context lines are used in the patch."""
262  cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
263  cmd.extend(files)
264  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
265  p.stdin.close()
266  return p
267
268
269def extract_lines(patch_file):
270  """Extract the changed lines in `patch_file`.
271
272  The return value is a dictionary mapping filename to a list of (start_line,
273  line_count) pairs.
274
275  The input must have been produced with ``-U0``, meaning unidiff format with
276  zero lines of context.  The return value is a dict mapping filename to a
277  list of line `Range`s."""
278  matches = {}
279  for line in patch_file:
280    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
281    if match:
282      filename = match.group(1).rstrip('\r\n')
283    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
284    if match:
285      start_line = int(match.group(1))
286      line_count = 1
287      if match.group(3):
288        line_count = int(match.group(3))
289      if line_count > 0:
290        matches.setdefault(filename, []).append(Range(start_line, line_count))
291  return matches
292
293
294def filter_by_extension(dictionary, allowed_extensions):
295  """Delete every key in `dictionary` that doesn't have an allowed extension.
296
297  `allowed_extensions` must be a collection of lowercase file extensions,
298  excluding the period."""
299  allowed_extensions = frozenset(allowed_extensions)
300  for filename in dictionary.keys():
301    base_ext = filename.rsplit('.', 1)
302    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
303      del dictionary[filename]
304
305
306def cd_to_toplevel():
307  """Change to the top level of the git repository."""
308  toplevel = run('git', 'rev-parse', '--show-toplevel')
309  os.chdir(toplevel)
310
311
312def create_tree_from_workdir(filenames):
313  """Create a new git tree with the given files from the working directory.
314
315  Returns the object ID (SHA-1) of the created tree."""
316  return create_tree(filenames, '--stdin')
317
318
319def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
320                                      style=None):
321  """Run clang-format on each file and save the result to a git tree.
322
323  Returns the object ID (SHA-1) of the created tree."""
324  def index_info_generator():
325    for filename, line_ranges in changed_lines.iteritems():
326      mode = oct(os.stat(filename).st_mode)
327      blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
328                                     style=style)
329      yield '%s %s\t%s' % (mode, blob_id, filename)
330  return create_tree(index_info_generator(), '--index-info')
331
332
333def create_tree(input_lines, mode):
334  """Create a tree object from the given input.
335
336  If mode is '--stdin', it must be a list of filenames.  If mode is
337  '--index-info' is must be a list of values suitable for "git update-index
338  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
339  is invalid."""
340  assert mode in ('--stdin', '--index-info')
341  cmd = ['git', 'update-index', '--add', '-z', mode]
342  with temporary_index_file():
343    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
344    for line in input_lines:
345      p.stdin.write('%s\0' % line)
346    p.stdin.close()
347    if p.wait() != 0:
348      die('`%s` failed' % ' '.join(cmd))
349    tree_id = run('git', 'write-tree')
350    return tree_id
351
352
353def clang_format_to_blob(filename, line_ranges, binary='clang-format',
354                         style=None):
355  """Run clang-format on the given file and save the result to a git blob.
356
357  Returns the object ID (SHA-1) of the created blob."""
358  clang_format_cmd = [binary, filename]
359  if style:
360    clang_format_cmd.extend(['-style='+style])
361  clang_format_cmd.extend([
362      '-lines=%s:%s' % (start_line, start_line+line_count-1)
363      for start_line, line_count in line_ranges])
364  try:
365    clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
366                                    stdout=subprocess.PIPE)
367  except OSError as e:
368    if e.errno == errno.ENOENT:
369      die('cannot find executable "%s"' % binary)
370    else:
371      raise
372  clang_format.stdin.close()
373  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
374  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
375                                 stdout=subprocess.PIPE)
376  clang_format.stdout.close()
377  stdout = hash_object.communicate()[0]
378  if hash_object.returncode != 0:
379    die('`%s` failed' % ' '.join(hash_object_cmd))
380  if clang_format.wait() != 0:
381    die('`%s` failed' % ' '.join(clang_format_cmd))
382  return stdout.rstrip('\r\n')
383
384
385@contextlib.contextmanager
386def temporary_index_file(tree=None):
387  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
388  the file afterward."""
389  index_path = create_temporary_index(tree)
390  old_index_path = os.environ.get('GIT_INDEX_FILE')
391  os.environ['GIT_INDEX_FILE'] = index_path
392  try:
393    yield
394  finally:
395    if old_index_path is None:
396      del os.environ['GIT_INDEX_FILE']
397    else:
398      os.environ['GIT_INDEX_FILE'] = old_index_path
399    os.remove(index_path)
400
401
402def create_temporary_index(tree=None):
403  """Create a temporary index file and return the created file's path.
404
405  If `tree` is not None, use that as the tree to read in.  Otherwise, an
406  empty index is created."""
407  gitdir = run('git', 'rev-parse', '--git-dir')
408  path = os.path.join(gitdir, temp_index_basename)
409  if tree is None:
410    tree = '--empty'
411  run('git', 'read-tree', '--index-output='+path, tree)
412  return path
413
414
415def print_diff(old_tree, new_tree):
416  """Print the diff between the two trees to stdout."""
417  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
418  # is expected to be viewed by the user, and only the former does nice things
419  # like color and pagination.
420  subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
421
422
423def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
424  """Apply the changes in `new_tree` to the working directory.
425
426  Bails if there are local changes in those files and not `force`.  If
427  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
428  changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
429                      new_tree).rstrip('\0').split('\0')
430  if not force:
431    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
432    if unstaged_files:
433      print >>sys.stderr, ('The following files would be modified but '
434                           'have unstaged changes:')
435      print >>sys.stderr, unstaged_files
436      print >>sys.stderr, 'Please commit, stage, or stash them first.'
437      sys.exit(2)
438  if patch_mode:
439    # In patch mode, we could just as well create an index from the new tree
440    # and checkout from that, but then the user will be presented with a
441    # message saying "Discard ... from worktree".  Instead, we use the old
442    # tree as the index and checkout from new_tree, which gives the slightly
443    # better message, "Apply ... to index and worktree".  This is not quite
444    # right, since it won't be applied to the user's index, but oh well.
445    with temporary_index_file(old_tree):
446      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
447    index_tree = old_tree
448  else:
449    with temporary_index_file(new_tree):
450      run('git', 'checkout-index', '-a', '-f')
451  return changed_files
452
453
454def run(*args, **kwargs):
455  stdin = kwargs.pop('stdin', '')
456  verbose = kwargs.pop('verbose', True)
457  strip = kwargs.pop('strip', True)
458  for name in kwargs:
459    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
460  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
461                       stdin=subprocess.PIPE)
462  stdout, stderr = p.communicate(input=stdin)
463  if p.returncode == 0:
464    if stderr:
465      if verbose:
466        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
467      print >>sys.stderr, stderr.rstrip()
468    if strip:
469      stdout = stdout.rstrip('\r\n')
470    return stdout
471  if verbose:
472    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
473  if stderr:
474    print >>sys.stderr, stderr.rstrip()
475  sys.exit(2)
476
477
478def die(message):
479  print >>sys.stderr, 'error:', message
480  sys.exit(2)
481
482
483if __name__ == '__main__':
484  main()
485