1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5#                     The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10#===------------------------------------------------------------------------===#
11
12r"""                                                                             
13clang-format git integration                                                     
14============================                                                     
15                                                                                 
16This file provides a clang-format integration for git. Put it somewhere in your  
17path and ensure that it is executable. Then, "git clang-format" will invoke      
18clang-format on the changes in current files or a specific commit.               
19                                                                                 
20For further details, run:                                                        
21git clang-format -h                                                              
22                                                                                 
23Requires Python 2.7                                                              
24"""               
25
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD.  Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46  clangFormat.binary
47  clangFormat.commit
48  clangFormat.extension
49  clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61  config = load_git_config()
62
63  # In order to keep '--' yet allow options after positionals, we need to
64  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
65  # nargs=argparse.REMAINDER disallows options after positionals.)
66  argv = sys.argv[1:]
67  try:
68    idx = argv.index('--')
69  except ValueError:
70    dash_dash = []
71  else:
72    dash_dash = argv[idx:]
73    argv = argv[:idx]
74
75  default_extensions = ','.join([
76      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77      'c', 'h',  # C
78      'm',  # ObjC
79      'mm',  # ObjC++
80      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
81      # Other languages that clang-format supports
82      'proto', 'protodevel',  # Protocol Buffers
83      'java',  # Java
84      'js',  # JavaScript
85      'ts',  # TypeScript
86      ])
87
88  p = argparse.ArgumentParser(
89    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
90    description=desc)
91  p.add_argument('--binary',
92                 default=config.get('clangformat.binary', 'clang-format'),
93                 help='path to clang-format'),
94  p.add_argument('--commit',
95                 default=config.get('clangformat.commit', 'HEAD'),
96                 help='default commit to use if none is specified'),
97  p.add_argument('--diff', action='store_true',
98                 help='print a diff instead of applying the changes')
99  p.add_argument('--extensions',
100                 default=config.get('clangformat.extensions',
101                                    default_extensions),
102                 help=('comma-separated list of file extensions to format, '
103                       'excluding the period and case-insensitive')),
104  p.add_argument('-f', '--force', action='store_true',
105                 help='allow changes to unstaged files')
106  p.add_argument('-p', '--patch', action='store_true',
107                 help='select hunks interactively')
108  p.add_argument('-q', '--quiet', action='count', default=0,
109                 help='print less information')
110  p.add_argument('--style',
111                 default=config.get('clangformat.style', None),
112                 help='passed to clang-format'),
113  p.add_argument('-v', '--verbose', action='count', default=0,
114                 help='print extra information')
115  # We gather all the remaining positional arguments into 'args' since we need
116  # to use some heuristics to determine whether or not <commit> was present.
117  # However, to print pretty messages, we make use of metavar and help.
118  p.add_argument('args', nargs='*', metavar='<commit>',
119                 help='revision from which to compute the diff')
120  p.add_argument('ignored', nargs='*', metavar='<file>...',
121                 help='if specified, only consider differences in these files')
122  opts = p.parse_args(argv)
123
124  opts.verbose -= opts.quiet
125  del opts.quiet
126
127  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
128  if len(commits) > 1:
129    if not opts.diff:
130      die('--diff is required when two commits are given')
131  else:
132    if len(commits) > 2:
133      die('at most two commits allowed; %d given' % len(commits))
134  changed_lines = compute_diff_and_extract_lines(commits, files)
135  if opts.verbose >= 1:
136    ignored_files = set(changed_lines)
137  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
138  if opts.verbose >= 1:
139    ignored_files.difference_update(changed_lines)
140    if ignored_files:
141      print 'Ignoring changes in the following files (wrong extension):'
142      for filename in ignored_files:
143        print '   ', filename
144    if changed_lines:
145      print 'Running clang-format on the following files:'
146      for filename in changed_lines:
147        print '   ', filename
148  if not changed_lines:
149    print 'no modified files to format'
150    return
151  # The computed diff outputs absolute paths, so we must cd before accessing
152  # those files.
153  cd_to_toplevel()
154  if len(commits) > 1:
155    old_tree = commits[1]
156    new_tree = run_clang_format_and_save_to_tree(changed_lines,
157                                                 revision=commits[1],
158                                                 binary=opts.binary,
159                                                 style=opts.style)
160  else:
161    old_tree = create_tree_from_workdir(changed_lines)
162    new_tree = run_clang_format_and_save_to_tree(changed_lines,
163                                                 binary=opts.binary,
164                                                 style=opts.style)
165  if opts.verbose >= 1:
166    print 'old tree:', old_tree
167    print 'new tree:', new_tree
168  if old_tree == new_tree:
169    if opts.verbose >= 0:
170      print 'clang-format did not modify any files'
171  elif opts.diff:
172    print_diff(old_tree, new_tree)
173  else:
174    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
175                                  patch_mode=opts.patch)
176    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
177      print 'changed files:'
178      for filename in changed_files:
179        print '   ', filename
180
181
182def load_git_config(non_string_options=None):
183  """Return the git configuration as a dictionary.
184
185  All options are assumed to be strings unless in `non_string_options`, in which
186  is a dictionary mapping option name (in lower case) to either "--bool" or
187  "--int"."""
188  if non_string_options is None:
189    non_string_options = {}
190  out = {}
191  for entry in run('git', 'config', '--list', '--null').split('\0'):
192    if entry:
193      name, value = entry.split('\n', 1)
194      if name in non_string_options:
195        value = run('git', 'config', non_string_options[name], name)
196      out[name] = value
197  return out
198
199
200def interpret_args(args, dash_dash, default_commit):
201  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
202
203  It is assumed that "--" and everything that follows has been removed from
204  args and placed in `dash_dash`.
205
206  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
207  left (if present) are taken as commits.  Otherwise, the arguments are checked
208  from left to right if they are commits or files.  If commits are not given,
209  a list with `default_commit` is used."""
210  if dash_dash:
211    if len(args) == 0:
212      commits = [default_commit]
213    else:
214      commits = args
215    for commit in commits:
216      object_type = get_object_type(commit)
217      if object_type not in ('commit', 'tag'):
218        if object_type is None:
219          die("'%s' is not a commit" % commit)
220        else:
221          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
222    files = dash_dash[1:]
223  elif args:
224    commits = []
225    while args:
226      if not disambiguate_revision(args[0]):
227        break
228      commits.append(args.pop(0))
229    if not commits:
230      commits = [default_commit]
231    files = args
232  else:
233    commits = [default_commit]
234    files = []
235  return commits, files
236
237
238def disambiguate_revision(value):
239  """Returns True if `value` is a revision, False if it is a file, or dies."""
240  # If `value` is ambiguous (neither a commit nor a file), the following
241  # command will die with an appropriate error message.
242  run('git', 'rev-parse', value, verbose=False)
243  object_type = get_object_type(value)
244  if object_type is None:
245    return False
246  if object_type in ('commit', 'tag'):
247    return True
248  die('`%s` is a %s, but a commit or filename was expected' %
249      (value, object_type))
250
251
252def get_object_type(value):
253  """Returns a string description of an object's type, or None if it is not
254  a valid git object."""
255  cmd = ['git', 'cat-file', '-t', value]
256  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
257  stdout, stderr = p.communicate()
258  if p.returncode != 0:
259    return None
260  return stdout.strip()
261
262
263def compute_diff_and_extract_lines(commits, files):
264  """Calls compute_diff() followed by extract_lines()."""
265  diff_process = compute_diff(commits, files)
266  changed_lines = extract_lines(diff_process.stdout)
267  diff_process.stdout.close()
268  diff_process.wait()
269  if diff_process.returncode != 0:
270    # Assume error was already printed to stderr.
271    sys.exit(2)
272  return changed_lines
273
274
275def compute_diff(commits, files):
276  """Return a subprocess object producing the diff from `commits`.
277
278  The return value's `stdin` file object will produce a patch with the
279  differences between the working directory and the first commit if a single
280  one was specified, or the difference between both specified commits, filtered
281  on `files` (if non-empty).  Zero context lines are used in the patch."""
282  git_tool = 'diff-index'
283  if len(commits) > 1:
284    git_tool = 'diff-tree'
285  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
286  cmd.extend(files)
287  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
288  p.stdin.close()
289  return p
290
291
292def extract_lines(patch_file):
293  """Extract the changed lines in `patch_file`.
294
295  The return value is a dictionary mapping filename to a list of (start_line,
296  line_count) pairs.
297
298  The input must have been produced with ``-U0``, meaning unidiff format with
299  zero lines of context.  The return value is a dict mapping filename to a
300  list of line `Range`s."""
301  matches = {}
302  for line in patch_file:
303    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
304    if match:
305      filename = match.group(1).rstrip('\r\n')
306    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
307    if match:
308      start_line = int(match.group(1))
309      line_count = 1
310      if match.group(3):
311        line_count = int(match.group(3))
312      if line_count > 0:
313        matches.setdefault(filename, []).append(Range(start_line, line_count))
314  return matches
315
316
317def filter_by_extension(dictionary, allowed_extensions):
318  """Delete every key in `dictionary` that doesn't have an allowed extension.
319
320  `allowed_extensions` must be a collection of lowercase file extensions,
321  excluding the period."""
322  allowed_extensions = frozenset(allowed_extensions)
323  for filename in dictionary.keys():
324    base_ext = filename.rsplit('.', 1)
325    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
326      del dictionary[filename]
327
328
329def cd_to_toplevel():
330  """Change to the top level of the git repository."""
331  toplevel = run('git', 'rev-parse', '--show-toplevel')
332  os.chdir(toplevel)
333
334
335def create_tree_from_workdir(filenames):
336  """Create a new git tree with the given files from the working directory.
337
338  Returns the object ID (SHA-1) of the created tree."""
339  return create_tree(filenames, '--stdin')
340
341
342def run_clang_format_and_save_to_tree(changed_lines, revision=None,
343                                      binary='clang-format', style=None):
344  """Run clang-format on each file and save the result to a git tree.
345
346  Returns the object ID (SHA-1) of the created tree."""
347  def index_info_generator():
348    for filename, line_ranges in changed_lines.iteritems():
349      if revision:
350        git_metadata_cmd = ['git', 'ls-tree',
351                            '%s:%s' % (revision, os.path.dirname(filename)),
352                            os.path.basename(filename)]
353        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
354                                        stdout=subprocess.PIPE)
355        stdout = git_metadata.communicate()[0]
356        mode = oct(int(stdout.split()[0], 8))
357      else:
358        mode = oct(os.stat(filename).st_mode)
359      blob_id = clang_format_to_blob(filename, line_ranges,
360                                     revision=revision,
361                                     binary=binary,
362                                     style=style)
363      yield '%s %s\t%s' % (mode, blob_id, filename)
364  return create_tree(index_info_generator(), '--index-info')
365
366
367def create_tree(input_lines, mode):
368  """Create a tree object from the given input.
369
370  If mode is '--stdin', it must be a list of filenames.  If mode is
371  '--index-info' is must be a list of values suitable for "git update-index
372  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
373  is invalid."""
374  assert mode in ('--stdin', '--index-info')
375  cmd = ['git', 'update-index', '--add', '-z', mode]
376  with temporary_index_file():
377    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
378    for line in input_lines:
379      p.stdin.write('%s\0' % line)
380    p.stdin.close()
381    if p.wait() != 0:
382      die('`%s` failed' % ' '.join(cmd))
383    tree_id = run('git', 'write-tree')
384    return tree_id
385
386
387def clang_format_to_blob(filename, line_ranges, revision=None,
388                         binary='clang-format', style=None):
389  """Run clang-format on the given file and save the result to a git blob.
390
391  Runs on the file in `revision` if not None, or on the file in the working
392  directory if `revision` is None.
393
394  Returns the object ID (SHA-1) of the created blob."""
395  clang_format_cmd = [binary]
396  if style:
397    clang_format_cmd.extend(['-style='+style])
398  clang_format_cmd.extend([
399      '-lines=%s:%s' % (start_line, start_line+line_count-1)
400      for start_line, line_count in line_ranges])
401  if revision:
402    clang_format_cmd.extend(['-assume-filename='+filename])
403    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
404    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
405                                stdout=subprocess.PIPE)
406    git_show.stdin.close()
407    clang_format_stdin = git_show.stdout
408  else:
409    clang_format_cmd.extend([filename])
410    git_show = None
411    clang_format_stdin = subprocess.PIPE
412  try:
413    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
414                                    stdout=subprocess.PIPE)
415    if clang_format_stdin == subprocess.PIPE:
416      clang_format_stdin = clang_format.stdin
417  except OSError as e:
418    if e.errno == errno.ENOENT:
419      die('cannot find executable "%s"' % binary)
420    else:
421      raise
422  clang_format_stdin.close()
423  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
424  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
425                                 stdout=subprocess.PIPE)
426  clang_format.stdout.close()
427  stdout = hash_object.communicate()[0]
428  if hash_object.returncode != 0:
429    die('`%s` failed' % ' '.join(hash_object_cmd))
430  if clang_format.wait() != 0:
431    die('`%s` failed' % ' '.join(clang_format_cmd))
432  if git_show and git_show.wait() != 0:
433    die('`%s` failed' % ' '.join(git_show_cmd))
434  return stdout.rstrip('\r\n')
435
436
437@contextlib.contextmanager
438def temporary_index_file(tree=None):
439  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
440  the file afterward."""
441  index_path = create_temporary_index(tree)
442  old_index_path = os.environ.get('GIT_INDEX_FILE')
443  os.environ['GIT_INDEX_FILE'] = index_path
444  try:
445    yield
446  finally:
447    if old_index_path is None:
448      del os.environ['GIT_INDEX_FILE']
449    else:
450      os.environ['GIT_INDEX_FILE'] = old_index_path
451    os.remove(index_path)
452
453
454def create_temporary_index(tree=None):
455  """Create a temporary index file and return the created file's path.
456
457  If `tree` is not None, use that as the tree to read in.  Otherwise, an
458  empty index is created."""
459  gitdir = run('git', 'rev-parse', '--git-dir')
460  path = os.path.join(gitdir, temp_index_basename)
461  if tree is None:
462    tree = '--empty'
463  run('git', 'read-tree', '--index-output='+path, tree)
464  return path
465
466
467def print_diff(old_tree, new_tree):
468  """Print the diff between the two trees to stdout."""
469  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
470  # is expected to be viewed by the user, and only the former does nice things
471  # like color and pagination.
472  #
473  # We also only print modified files since `new_tree` only contains the files
474  # that were modified, so unmodified files would show as deleted without the
475  # filter.
476  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
477                         '--'])
478
479
480def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
481  """Apply the changes in `new_tree` to the working directory.
482
483  Bails if there are local changes in those files and not `force`.  If
484  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
485  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
486                      '--name-only', old_tree,
487                      new_tree).rstrip('\0').split('\0')
488  if not force:
489    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
490    if unstaged_files:
491      print >>sys.stderr, ('The following files would be modified but '
492                           'have unstaged changes:')
493      print >>sys.stderr, unstaged_files
494      print >>sys.stderr, 'Please commit, stage, or stash them first.'
495      sys.exit(2)
496  if patch_mode:
497    # In patch mode, we could just as well create an index from the new tree
498    # and checkout from that, but then the user will be presented with a
499    # message saying "Discard ... from worktree".  Instead, we use the old
500    # tree as the index and checkout from new_tree, which gives the slightly
501    # better message, "Apply ... to index and worktree".  This is not quite
502    # right, since it won't be applied to the user's index, but oh well.
503    with temporary_index_file(old_tree):
504      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
505    index_tree = old_tree
506  else:
507    with temporary_index_file(new_tree):
508      run('git', 'checkout-index', '-a', '-f')
509  return changed_files
510
511
512def run(*args, **kwargs):
513  stdin = kwargs.pop('stdin', '')
514  verbose = kwargs.pop('verbose', True)
515  strip = kwargs.pop('strip', True)
516  for name in kwargs:
517    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
518  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
519                       stdin=subprocess.PIPE)
520  stdout, stderr = p.communicate(input=stdin)
521  if p.returncode == 0:
522    if stderr:
523      if verbose:
524        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
525      print >>sys.stderr, stderr.rstrip()
526    if strip:
527      stdout = stdout.rstrip('\r\n')
528    return stdout
529  if verbose:
530    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
531  if stderr:
532    print >>sys.stderr, stderr.rstrip()
533  sys.exit(2)
534
535
536def die(message):
537  print >>sys.stderr, 'error:', message
538  sys.exit(2)
539
540
541if __name__ == '__main__':
542  main()
543