1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5#                     The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10#===------------------------------------------------------------------------===#
11
12r"""                                                                             
13clang-format git integration                                                     
14============================                                                     
15                                                                                 
16This file provides a clang-format integration for git. Put it somewhere in your  
17path and ensure that it is executable. Then, "git clang-format" will invoke      
18clang-format on the changes in current files or a specific commit.               
19                                                                                 
20For further details, run:                                                        
21git clang-format -h                                                              
22                                                                                 
23Requires Python 2.7 or Python 3                                                  
24"""               
25
26from __future__ import print_function
27import argparse
28import collections
29import contextlib
30import errno
31import os
32import re
33import subprocess
34import sys
35
36usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
37
38desc = '''
39If zero or one commits are given, run clang-format on all lines that differ
40between the working directory and <commit>, which defaults to HEAD.  Changes are
41only applied to the working directory.
42
43If two commits are given (requires --diff), run clang-format on all lines in the
44second <commit> that differ from the first <commit>.
45
46The following git-config settings set the default of the corresponding option:
47  clangFormat.binary
48  clangFormat.commit
49  clangFormat.extension
50  clangFormat.style
51'''
52
53# Name of the temporary index file in which save the output of clang-format.
54# This file is created within the .git directory.
55temp_index_basename = 'clang-format-index'
56
57
58Range = collections.namedtuple('Range', 'start, count')
59
60
61def main():
62  config = load_git_config()
63
64  # In order to keep '--' yet allow options after positionals, we need to
65  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
66  # nargs=argparse.REMAINDER disallows options after positionals.)
67  argv = sys.argv[1:]
68  try:
69    idx = argv.index('--')
70  except ValueError:
71    dash_dash = []
72  else:
73    dash_dash = argv[idx:]
74    argv = argv[:idx]
75
76  default_extensions = ','.join([
77      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
78      'c', 'h',  # C
79      'm',  # ObjC
80      'mm',  # ObjC++
81      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
82      # Other languages that clang-format supports
83      'proto', 'protodevel',  # Protocol Buffers
84      'java',  # Java
85      'js',  # JavaScript
86      'ts',  # TypeScript
87      ])
88
89  p = argparse.ArgumentParser(
90    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
91    description=desc)
92  p.add_argument('--binary',
93                 default=config.get('clangformat.binary', 'clang-format'),
94                 help='path to clang-format'),
95  p.add_argument('--commit',
96                 default=config.get('clangformat.commit', 'HEAD'),
97                 help='default commit to use if none is specified'),
98  p.add_argument('--diff', action='store_true',
99                 help='print a diff instead of applying the changes')
100  p.add_argument('--extensions',
101                 default=config.get('clangformat.extensions',
102                                    default_extensions),
103                 help=('comma-separated list of file extensions to format, '
104                       'excluding the period and case-insensitive')),
105  p.add_argument('-f', '--force', action='store_true',
106                 help='allow changes to unstaged files')
107  p.add_argument('-p', '--patch', action='store_true',
108                 help='select hunks interactively')
109  p.add_argument('-q', '--quiet', action='count', default=0,
110                 help='print less information')
111  p.add_argument('--style',
112                 default=config.get('clangformat.style', None),
113                 help='passed to clang-format'),
114  p.add_argument('-v', '--verbose', action='count', default=0,
115                 help='print extra information')
116  # We gather all the remaining positional arguments into 'args' since we need
117  # to use some heuristics to determine whether or not <commit> was present.
118  # However, to print pretty messages, we make use of metavar and help.
119  p.add_argument('args', nargs='*', metavar='<commit>',
120                 help='revision from which to compute the diff')
121  p.add_argument('ignored', nargs='*', metavar='<file>...',
122                 help='if specified, only consider differences in these files')
123  opts = p.parse_args(argv)
124
125  opts.verbose -= opts.quiet
126  del opts.quiet
127
128  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
129  if len(commits) > 1:
130    if not opts.diff:
131      die('--diff is required when two commits are given')
132  else:
133    if len(commits) > 2:
134      die('at most two commits allowed; %d given' % len(commits))
135  changed_lines = compute_diff_and_extract_lines(commits, files)
136  if opts.verbose >= 1:
137    ignored_files = set(changed_lines)
138  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
139  if opts.verbose >= 1:
140    ignored_files.difference_update(changed_lines)
141    if ignored_files:
142      print('Ignoring changes in the following files (wrong extension):')
143      for filename in ignored_files:
144        print('    %s' % filename)
145    if changed_lines:
146      print('Running clang-format on the following files:')
147      for filename in changed_lines:
148        print('    %s' % filename)
149  if not changed_lines:
150    print('no modified files to format')
151    return
152  # The computed diff outputs absolute paths, so we must cd before accessing
153  # those files.
154  cd_to_toplevel()
155  if len(commits) > 1:
156    old_tree = commits[1]
157    new_tree = run_clang_format_and_save_to_tree(changed_lines,
158                                                 revision=commits[1],
159                                                 binary=opts.binary,
160                                                 style=opts.style)
161  else:
162    old_tree = create_tree_from_workdir(changed_lines)
163    new_tree = run_clang_format_and_save_to_tree(changed_lines,
164                                                 binary=opts.binary,
165                                                 style=opts.style)
166  if opts.verbose >= 1:
167    print('old tree: %s' % old_tree)
168    print('new tree: %s' % new_tree)
169  if old_tree == new_tree:
170    if opts.verbose >= 0:
171      print('clang-format did not modify any files')
172  elif opts.diff:
173    print_diff(old_tree, new_tree)
174  else:
175    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
176                                  patch_mode=opts.patch)
177    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
178      print('changed files:')
179      for filename in changed_files:
180        print('    %s' % filename)
181
182
183def load_git_config(non_string_options=None):
184  """Return the git configuration as a dictionary.
185
186  All options are assumed to be strings unless in `non_string_options`, in which
187  is a dictionary mapping option name (in lower case) to either "--bool" or
188  "--int"."""
189  if non_string_options is None:
190    non_string_options = {}
191  out = {}
192  for entry in run('git', 'config', '--list', '--null').split('\0'):
193    if entry:
194      name, value = entry.split('\n', 1)
195      if name in non_string_options:
196        value = run('git', 'config', non_string_options[name], name)
197      out[name] = value
198  return out
199
200
201def interpret_args(args, dash_dash, default_commit):
202  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
203
204  It is assumed that "--" and everything that follows has been removed from
205  args and placed in `dash_dash`.
206
207  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
208  left (if present) are taken as commits.  Otherwise, the arguments are checked
209  from left to right if they are commits or files.  If commits are not given,
210  a list with `default_commit` is used."""
211  if dash_dash:
212    if len(args) == 0:
213      commits = [default_commit]
214    else:
215      commits = args
216    for commit in commits:
217      object_type = get_object_type(commit)
218      if object_type not in ('commit', 'tag'):
219        if object_type is None:
220          die("'%s' is not a commit" % commit)
221        else:
222          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
223    files = dash_dash[1:]
224  elif args:
225    commits = []
226    while args:
227      if not disambiguate_revision(args[0]):
228        break
229      commits.append(args.pop(0))
230    if not commits:
231      commits = [default_commit]
232    files = args
233  else:
234    commits = [default_commit]
235    files = []
236  return commits, files
237
238
239def disambiguate_revision(value):
240  """Returns True if `value` is a revision, False if it is a file, or dies."""
241  # If `value` is ambiguous (neither a commit nor a file), the following
242  # command will die with an appropriate error message.
243  run('git', 'rev-parse', value, verbose=False)
244  object_type = get_object_type(value)
245  if object_type is None:
246    return False
247  if object_type in ('commit', 'tag'):
248    return True
249  die('`%s` is a %s, but a commit or filename was expected' %
250      (value, object_type))
251
252
253def get_object_type(value):
254  """Returns a string description of an object's type, or None if it is not
255  a valid git object."""
256  cmd = ['git', 'cat-file', '-t', value]
257  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
258  stdout, stderr = p.communicate()
259  if p.returncode != 0:
260    return None
261  return convert_string(stdout.strip())
262
263
264def compute_diff_and_extract_lines(commits, files):
265  """Calls compute_diff() followed by extract_lines()."""
266  diff_process = compute_diff(commits, files)
267  changed_lines = extract_lines(diff_process.stdout)
268  diff_process.stdout.close()
269  diff_process.wait()
270  if diff_process.returncode != 0:
271    # Assume error was already printed to stderr.
272    sys.exit(2)
273  return changed_lines
274
275
276def compute_diff(commits, files):
277  """Return a subprocess object producing the diff from `commits`.
278
279  The return value's `stdin` file object will produce a patch with the
280  differences between the working directory and the first commit if a single
281  one was specified, or the difference between both specified commits, filtered
282  on `files` (if non-empty).  Zero context lines are used in the patch."""
283  git_tool = 'diff-index'
284  if len(commits) > 1:
285    git_tool = 'diff-tree'
286  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
287  cmd.extend(files)
288  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
289  p.stdin.close()
290  return p
291
292
293def extract_lines(patch_file):
294  """Extract the changed lines in `patch_file`.
295
296  The return value is a dictionary mapping filename to a list of (start_line,
297  line_count) pairs.
298
299  The input must have been produced with ``-U0``, meaning unidiff format with
300  zero lines of context.  The return value is a dict mapping filename to a
301  list of line `Range`s."""
302  matches = {}
303  for line in patch_file:
304    line = convert_string(line)
305    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
306    if match:
307      filename = match.group(1).rstrip('\r\n')
308    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
309    if match:
310      start_line = int(match.group(1))
311      line_count = 1
312      if match.group(3):
313        line_count = int(match.group(3))
314      if line_count > 0:
315        matches.setdefault(filename, []).append(Range(start_line, line_count))
316  return matches
317
318
319def filter_by_extension(dictionary, allowed_extensions):
320  """Delete every key in `dictionary` that doesn't have an allowed extension.
321
322  `allowed_extensions` must be a collection of lowercase file extensions,
323  excluding the period."""
324  allowed_extensions = frozenset(allowed_extensions)
325  for filename in list(dictionary.keys()):
326    base_ext = filename.rsplit('.', 1)
327    if len(base_ext) == 1 and '' in allowed_extensions:
328        continue
329    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
330      del dictionary[filename]
331
332
333def cd_to_toplevel():
334  """Change to the top level of the git repository."""
335  toplevel = run('git', 'rev-parse', '--show-toplevel')
336  os.chdir(toplevel)
337
338
339def create_tree_from_workdir(filenames):
340  """Create a new git tree with the given files from the working directory.
341
342  Returns the object ID (SHA-1) of the created tree."""
343  return create_tree(filenames, '--stdin')
344
345
346def run_clang_format_and_save_to_tree(changed_lines, revision=None,
347                                      binary='clang-format', style=None):
348  """Run clang-format on each file and save the result to a git tree.
349
350  Returns the object ID (SHA-1) of the created tree."""
351  def iteritems(container):
352      try:
353          return container.iteritems() # Python 2
354      except AttributeError:
355          return container.items() # Python 3
356  def index_info_generator():
357    for filename, line_ranges in iteritems(changed_lines):
358      if revision:
359        git_metadata_cmd = ['git', 'ls-tree',
360                            '%s:%s' % (revision, os.path.dirname(filename)),
361                            os.path.basename(filename)]
362        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
363                                        stdout=subprocess.PIPE)
364        stdout = git_metadata.communicate()[0]
365        mode = oct(int(stdout.split()[0], 8))
366      else:
367        mode = oct(os.stat(filename).st_mode)
368      # Adjust python3 octal format so that it matches what git expects
369      if mode.startswith('0o'):
370          mode = '0' + mode[2:]
371      blob_id = clang_format_to_blob(filename, line_ranges,
372                                     revision=revision,
373                                     binary=binary,
374                                     style=style)
375      yield '%s %s\t%s' % (mode, blob_id, filename)
376  return create_tree(index_info_generator(), '--index-info')
377
378
379def create_tree(input_lines, mode):
380  """Create a tree object from the given input.
381
382  If mode is '--stdin', it must be a list of filenames.  If mode is
383  '--index-info' is must be a list of values suitable for "git update-index
384  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
385  is invalid."""
386  assert mode in ('--stdin', '--index-info')
387  cmd = ['git', 'update-index', '--add', '-z', mode]
388  with temporary_index_file():
389    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
390    for line in input_lines:
391      p.stdin.write(to_bytes('%s\0' % line))
392    p.stdin.close()
393    if p.wait() != 0:
394      die('`%s` failed' % ' '.join(cmd))
395    tree_id = run('git', 'write-tree')
396    return tree_id
397
398
399def clang_format_to_blob(filename, line_ranges, revision=None,
400                         binary='clang-format', style=None):
401  """Run clang-format on the given file and save the result to a git blob.
402
403  Runs on the file in `revision` if not None, or on the file in the working
404  directory if `revision` is None.
405
406  Returns the object ID (SHA-1) of the created blob."""
407  clang_format_cmd = [binary]
408  if style:
409    clang_format_cmd.extend(['-style='+style])
410  clang_format_cmd.extend([
411      '-lines=%s:%s' % (start_line, start_line+line_count-1)
412      for start_line, line_count in line_ranges])
413  if revision:
414    clang_format_cmd.extend(['-assume-filename='+filename])
415    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
416    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
417                                stdout=subprocess.PIPE)
418    git_show.stdin.close()
419    clang_format_stdin = git_show.stdout
420  else:
421    clang_format_cmd.extend([filename])
422    git_show = None
423    clang_format_stdin = subprocess.PIPE
424  try:
425    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
426                                    stdout=subprocess.PIPE)
427    if clang_format_stdin == subprocess.PIPE:
428      clang_format_stdin = clang_format.stdin
429  except OSError as e:
430    if e.errno == errno.ENOENT:
431      die('cannot find executable "%s"' % binary)
432    else:
433      raise
434  clang_format_stdin.close()
435  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
436  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
437                                 stdout=subprocess.PIPE)
438  clang_format.stdout.close()
439  stdout = hash_object.communicate()[0]
440  if hash_object.returncode != 0:
441    die('`%s` failed' % ' '.join(hash_object_cmd))
442  if clang_format.wait() != 0:
443    die('`%s` failed' % ' '.join(clang_format_cmd))
444  if git_show and git_show.wait() != 0:
445    die('`%s` failed' % ' '.join(git_show_cmd))
446  return convert_string(stdout).rstrip('\r\n')
447
448
449@contextlib.contextmanager
450def temporary_index_file(tree=None):
451  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
452  the file afterward."""
453  index_path = create_temporary_index(tree)
454  old_index_path = os.environ.get('GIT_INDEX_FILE')
455  os.environ['GIT_INDEX_FILE'] = index_path
456  try:
457    yield
458  finally:
459    if old_index_path is None:
460      del os.environ['GIT_INDEX_FILE']
461    else:
462      os.environ['GIT_INDEX_FILE'] = old_index_path
463    os.remove(index_path)
464
465
466def create_temporary_index(tree=None):
467  """Create a temporary index file and return the created file's path.
468
469  If `tree` is not None, use that as the tree to read in.  Otherwise, an
470  empty index is created."""
471  gitdir = run('git', 'rev-parse', '--git-dir')
472  path = os.path.join(gitdir, temp_index_basename)
473  if tree is None:
474    tree = '--empty'
475  run('git', 'read-tree', '--index-output='+path, tree)
476  return path
477
478
479def print_diff(old_tree, new_tree):
480  """Print the diff between the two trees to stdout."""
481  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
482  # is expected to be viewed by the user, and only the former does nice things
483  # like color and pagination.
484  #
485  # We also only print modified files since `new_tree` only contains the files
486  # that were modified, so unmodified files would show as deleted without the
487  # filter.
488  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
489                         '--'])
490
491
492def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
493  """Apply the changes in `new_tree` to the working directory.
494
495  Bails if there are local changes in those files and not `force`.  If
496  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
497  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
498                      '--name-only', old_tree,
499                      new_tree).rstrip('\0').split('\0')
500  if not force:
501    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
502    if unstaged_files:
503      print('The following files would be modified but '
504                'have unstaged changes:', file=sys.stderr)
505      print(unstaged_files, file=sys.stderr)
506      print('Please commit, stage, or stash them first.', file=sys.stderr)
507      sys.exit(2)
508  if patch_mode:
509    # In patch mode, we could just as well create an index from the new tree
510    # and checkout from that, but then the user will be presented with a
511    # message saying "Discard ... from worktree".  Instead, we use the old
512    # tree as the index and checkout from new_tree, which gives the slightly
513    # better message, "Apply ... to index and worktree".  This is not quite
514    # right, since it won't be applied to the user's index, but oh well.
515    with temporary_index_file(old_tree):
516      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
517    index_tree = old_tree
518  else:
519    with temporary_index_file(new_tree):
520      run('git', 'checkout-index', '-a', '-f')
521  return changed_files
522
523
524def run(*args, **kwargs):
525  stdin = kwargs.pop('stdin', '')
526  verbose = kwargs.pop('verbose', True)
527  strip = kwargs.pop('strip', True)
528  for name in kwargs:
529    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
530  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
531                       stdin=subprocess.PIPE)
532  stdout, stderr = p.communicate(input=stdin)
533
534  stdout = convert_string(stdout)
535  stderr = convert_string(stderr)
536
537  if p.returncode == 0:
538    if stderr:
539      if verbose:
540        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
541      print(stderr.rstrip(), file=sys.stderr)
542    if strip:
543      stdout = stdout.rstrip('\r\n')
544    return stdout
545  if verbose:
546    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
547  if stderr:
548    print(stderr.rstrip(), file=sys.stderr)
549  sys.exit(2)
550
551
552def die(message):
553  print('error:', message, file=sys.stderr)
554  sys.exit(2)
555
556
557def to_bytes(str_input):
558    # Encode to UTF-8 to get binary data.
559    if isinstance(str_input, bytes):
560        return str_input
561    return str_input.encode('utf-8')
562
563
564def to_string(bytes_input):
565    if isinstance(bytes_input, str):
566        return bytes_input
567    return bytes_input.encode('utf-8')
568
569
570def convert_string(bytes_input):
571    try:
572        return to_string(bytes_input.decode('utf-8'))
573    except AttributeError: # 'str' object has no attribute 'decode'.
574        return str(bytes_input)
575    except UnicodeError:
576        return str(bytes_input)
577
578if __name__ == '__main__':
579  main()
580