checkperms.py revision f2477e01787aa58f445919b809d89e252beef54f
1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Makes sure files have the right permissions.
7
8Some developers have broken SCM configurations that flip the svn:executable
9permission on for no good reason. Unix developers who run ls --color will then
10see .cc files in green and get confused.
11
12- For file extensions that must be executable, add it to EXECUTABLE_EXTENSIONS.
13- For file extensions that must not be executable, add it to
14  NOT_EXECUTABLE_EXTENSIONS.
15- To ignore all the files inside a directory, add it to IGNORED_PATHS.
16- For file base name with ambiguous state and that should not be checked for
17  shebang, add it to IGNORED_FILENAMES.
18
19Any file not matching the above will be opened and looked if it has a shebang
20or an ELF header. If this does not match the executable bit on the file, the
21file will be flagged.
22
23Note that all directory separators must be slashes (Unix-style) and not
24backslashes. All directories should be relative to the source root and all
25file paths should be only lowercase.
26"""
27
28import logging
29import optparse
30import os
31import stat
32import string
33import subprocess
34import sys
35
36#### USER EDITABLE SECTION STARTS HERE ####
37
38# Files with these extensions must have executable bit set.
39#
40# Case-sensitive.
41EXECUTABLE_EXTENSIONS = (
42  'bat',
43  'dll',
44  'dylib',
45  'exe',
46)
47
48# These files must have executable bit set.
49#
50# Case-insensitive, lower-case only.
51EXECUTABLE_PATHS = (
52  'chrome/test/data/extensions/uitest/plugins/plugin.plugin/contents/'
53      'macos/testnetscapeplugin',
54  'chrome/test/data/extensions/uitest/plugins_private/plugin.plugin/contents/'
55      'macos/testnetscapeplugin',
56)
57
58# These files must not have the executable bit set. This is mainly a performance
59# optimization as these files are not checked for shebang. The list was
60# partially generated from:
61# git ls-files | grep "\\." | sed 's/.*\.//' | sort | uniq -c | sort -b -g
62#
63# Case-sensitive.
64NON_EXECUTABLE_EXTENSIONS = (
65  '1',
66  '3ds',
67  'S',
68  'am',
69  'applescript',
70  'asm',
71  'c',
72  'cc',
73  'cfg',
74  'chromium',
75  'cpp',
76  'crx',
77  'cs',
78  'css',
79  'cur',
80  'def',
81  'der',
82  'expected',
83  'gif',
84  'grd',
85  'gyp',
86  'gypi',
87  'h',
88  'hh',
89  'htm',
90  'html',
91  'hyph',
92  'ico',
93  'idl',
94  'java',
95  'jpg',
96  'js',
97  'json',
98  'm',
99  'm4',
100  'mm',
101  'mms',
102  'mock-http-headers',
103  'nexe',
104  'nmf',
105  'onc',
106  'pat',
107  'patch',
108  'pdf',
109  'pem',
110  'plist',
111  'png',
112  'proto',
113  'rc',
114  'rfx',
115  'rgs',
116  'rules',
117  'spec',
118  'sql',
119  'srpc',
120  'svg',
121  'tcl',
122  'test',
123  'tga',
124  'txt',
125  'vcproj',
126  'vsprops',
127  'webm',
128  'word',
129  'xib',
130  'xml',
131  'xtb',
132  'zip',
133)
134
135# These files must not have executable bit set.
136#
137# Case-insensitive, lower-case only.
138NON_EXECUTABLE_PATHS = (
139  'build/android/tests/symbolize/liba.so',
140  'build/android/tests/symbolize/libb.so',
141  'chrome/installer/mac/sign_app.sh.in',
142  'chrome/installer/mac/sign_versioned_dir.sh.in',
143  'chrome/test/data/components/ihfokbkgjpifnbbojhneepfflplebdkc/'
144      'ihfokbkgjpifnbbojhneepfflplebdkc_1/a_changing_binary_file',
145  'chrome/test/data/components/ihfokbkgjpifnbbojhneepfflplebdkc/'
146      'ihfokbkgjpifnbbojhneepfflplebdkc_2/a_changing_binary_file',
147  'chrome/test/data/extensions/uitest/plugins/plugin32.so',
148  'chrome/test/data/extensions/uitest/plugins/plugin64.so',
149  'chrome/test/data/extensions/uitest/plugins_private/plugin32.so',
150  'chrome/test/data/extensions/uitest/plugins_private/plugin64.so',
151  'courgette/testdata/elf-32-1',
152  'courgette/testdata/elf-32-2',
153  'courgette/testdata/elf-64',
154)
155
156# File names that are always whitelisted.  (These are mostly autoconf spew.)
157#
158# Case-sensitive.
159IGNORED_FILENAMES = (
160  'config.guess',
161  'config.sub',
162  'configure',
163  'depcomp',
164  'install-sh',
165  'missing',
166  'mkinstalldirs',
167  'naclsdk',
168  'scons',
169)
170
171# File paths starting with one of these will be ignored as well.
172# Please consider fixing your file permissions, rather than adding to this list.
173#
174# Case-insensitive, lower-case only.
175IGNORED_PATHS = (
176  'native_client_sdk/src/build_tools/sdk_tools/third_party/fancy_urllib/'
177      '__init__.py',
178  'out/',
179  # TODO(maruel): Fix these.
180  'third_party/android_testrunner/',
181  'third_party/bintrees/',
182  'third_party/closure_linter/',
183  'third_party/devscripts/licensecheck.pl.vanilla',
184  'third_party/hyphen/',
185  'third_party/jemalloc/',
186  'third_party/lcov-1.9/contrib/galaxy/conglomerate_functions.pl',
187  'third_party/lcov-1.9/contrib/galaxy/gen_makefile.sh',
188  'third_party/lcov/contrib/galaxy/conglomerate_functions.pl',
189  'third_party/lcov/contrib/galaxy/gen_makefile.sh',
190  'third_party/libevent/autogen.sh',
191  'third_party/libevent/test/test.sh',
192  'third_party/libxml/linux/xml2-config',
193  'third_party/libxml/src/ltmain.sh',
194  'third_party/mesa/',
195  'third_party/protobuf/',
196  'third_party/python_gflags/gflags.py',
197  'third_party/sqlite/',
198  'third_party/talloc/script/mksyms.sh',
199  'third_party/tcmalloc/',
200  'third_party/tlslite/setup.py',
201)
202
203#### USER EDITABLE SECTION ENDS HERE ####
204
205assert set(EXECUTABLE_EXTENSIONS) & set(NON_EXECUTABLE_EXTENSIONS) == set()
206assert set(EXECUTABLE_PATHS) & set(NON_EXECUTABLE_PATHS) == set()
207
208VALID_CHARS = set(string.ascii_lowercase + string.digits + '/-_.')
209for paths in (EXECUTABLE_PATHS, NON_EXECUTABLE_PATHS, IGNORED_PATHS):
210  assert all([set(path).issubset(VALID_CHARS) for path in paths])
211
212
213def capture(cmd, cwd):
214  """Returns the output of a command.
215
216  Ignores the error code or stderr.
217  """
218  logging.debug('%s; cwd=%s' % (' '.join(cmd), cwd))
219  env = os.environ.copy()
220  env['LANGUAGE'] = 'en_US.UTF-8'
221  p = subprocess.Popen(
222      cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, env=env)
223  return p.communicate()[0]
224
225
226def get_svn_info(dir_path):
227  """Returns svn meta-data for a svn checkout."""
228  if not os.path.isdir(dir_path):
229    return {}
230  out = capture(['svn', 'info', '.', '--non-interactive'], dir_path)
231  return dict(l.split(': ', 1) for l in out.splitlines() if l)
232
233
234def get_svn_url(dir_path):
235  return get_svn_info(dir_path).get('URL')
236
237
238def get_svn_root(dir_path):
239  """Returns the svn checkout root or None."""
240  svn_url = get_svn_url(dir_path)
241  if not svn_url:
242    return None
243  logging.info('svn url: %s' % svn_url)
244  while True:
245    parent = os.path.dirname(dir_path)
246    if parent == dir_path:
247      return None
248    svn_url = svn_url.rsplit('/', 1)[0]
249    if svn_url != get_svn_url(parent):
250      return dir_path
251    dir_path = parent
252
253
254def get_git_root(dir_path):
255  """Returns the git checkout root or None."""
256  root = capture(['git', 'rev-parse', '--show-toplevel'], dir_path).strip()
257  if root:
258    return root
259
260
261def is_ignored(rel_path):
262  """Returns True if rel_path is in our whitelist of files to ignore."""
263  rel_path = rel_path.lower()
264  return (
265      os.path.basename(rel_path) in IGNORED_FILENAMES or
266      rel_path.lower().startswith(IGNORED_PATHS))
267
268
269def must_be_executable(rel_path):
270  """The file name represents a file type that must have the executable bit
271  set.
272  """
273  return (os.path.splitext(rel_path)[1][1:] in EXECUTABLE_EXTENSIONS or
274          rel_path.lower() in EXECUTABLE_PATHS)
275
276
277def must_not_be_executable(rel_path):
278  """The file name represents a file type that must not have the executable
279  bit set.
280  """
281  return (os.path.splitext(rel_path)[1][1:] in NON_EXECUTABLE_EXTENSIONS or
282          rel_path.lower() in NON_EXECUTABLE_PATHS)
283
284
285def has_executable_bit(full_path):
286  """Returns if any executable bit is set."""
287  permission = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
288  return bool(permission & os.stat(full_path).st_mode)
289
290
291def has_shebang_or_is_elf(full_path):
292  """Returns if the file starts with #!/ or is an ELF binary.
293
294  full_path is the absolute path to the file.
295  """
296  with open(full_path, 'rb') as f:
297    data = f.read(4)
298    return (data[:3] == '#!/', data == '\x7fELF')
299
300
301def check_file(root_path, rel_path, bare_output):
302  """Checks the permissions of the file whose path is root_path + rel_path and
303  returns an error if it is inconsistent.
304
305  It is assumed that the file is not ignored by is_ignored().
306
307  If the file name is matched with must_be_executable() or
308  must_not_be_executable(), only its executable bit is checked.
309  Otherwise, the first few bytes of the file are read to verify if it has a
310  shebang or ELF header and compares this with the executable bit on the file.
311  """
312  full_path = os.path.join(root_path, rel_path)
313  try:
314    bit = has_executable_bit(full_path)
315  except OSError:
316    # It's faster to catch exception than call os.path.islink(). Chromium
317    # tree happens to have invalid symlinks under
318    # third_party/openssl/openssl/test/.
319    return None
320
321  if must_be_executable(rel_path):
322    if not bit:
323      if bare_output:
324        return full_path
325      return '%s: Must have executable bit set' % full_path
326    return
327  if must_not_be_executable(rel_path):
328    if bit:
329      if bare_output:
330        return full_path
331      return '%s: Must not have executable bit set' % full_path
332    return
333
334  # For the others, it depends on the file header.
335  (shebang, elf) = has_shebang_or_is_elf(full_path)
336  if bit != (shebang or elf):
337    if bare_output:
338      return full_path
339    if bit:
340      return '%s: Has executable bit but not shebang or ELF header' % full_path
341    if shebang:
342      return '%s: Has shebang but not executable bit' % full_path
343    return '%s: Has ELF header but not executable bit' % full_path
344
345
346def check_files(root, files, bare_output):
347  errors = []
348  for rel_path in files:
349    if is_ignored(rel_path):
350      continue
351
352    error = check_file(root, rel_path, bare_output)
353    if error:
354      errors.append(error)
355
356  return errors
357
358
359class ApiBase(object):
360  def __init__(self, root_dir, bare_output):
361    self.root_dir = root_dir
362    self.bare_output = bare_output
363    self.count = 0
364    self.count_read_header = 0
365
366  def check_file(self, rel_path):
367    logging.debug('check_file(%s)' % rel_path)
368    self.count += 1
369
370    if (not must_be_executable(rel_path) and
371        not must_not_be_executable(rel_path)):
372      self.count_read_header += 1
373
374    return check_file(self.root_dir, rel_path, self.bare_output)
375
376  def check_dir(self, rel_path):
377    return self.check(rel_path)
378
379  def check(self, start_dir):
380    """Check the files in start_dir, recursively check its subdirectories."""
381    errors = []
382    items = self.list_dir(start_dir)
383    logging.info('check(%s) -> %d' % (start_dir, len(items)))
384    for item in items:
385      full_path = os.path.join(self.root_dir, start_dir, item)
386      rel_path = full_path[len(self.root_dir) + 1:]
387      if is_ignored(rel_path):
388        continue
389      if os.path.isdir(full_path):
390        # Depth first.
391        errors.extend(self.check_dir(rel_path))
392      else:
393        error = self.check_file(rel_path)
394        if error:
395          errors.append(error)
396    return errors
397
398  def list_dir(self, start_dir):
399    """Lists all the files and directory inside start_dir."""
400    return sorted(
401      x for x in os.listdir(os.path.join(self.root_dir, start_dir))
402      if not x.startswith('.')
403    )
404
405
406class ApiSvnQuick(ApiBase):
407  """Returns all files in svn-versioned directories, independent of the fact if
408  they are versionned.
409
410  Uses svn info in each directory to determine which directories should be
411  crawled.
412  """
413  def __init__(self, *args):
414    super(ApiSvnQuick, self).__init__(*args)
415    self.url = get_svn_url(self.root_dir)
416
417  def check_dir(self, rel_path):
418    url = self.url + '/' + rel_path
419    if get_svn_url(os.path.join(self.root_dir, rel_path)) != url:
420      return []
421    return super(ApiSvnQuick, self).check_dir(rel_path)
422
423
424class ApiAllFilesAtOnceBase(ApiBase):
425  _files = None
426
427  def list_dir(self, start_dir):
428    """Lists all the files and directory inside start_dir."""
429    if self._files is None:
430      self._files = sorted(self._get_all_files())
431      if not self.bare_output:
432        print 'Found %s files' % len(self._files)
433    start_dir = start_dir[len(self.root_dir) + 1:]
434    return [
435      x[len(start_dir):] for x in self._files if x.startswith(start_dir)
436    ]
437
438  def _get_all_files(self):
439    """Lists all the files and directory inside self._root_dir."""
440    raise NotImplementedError()
441
442
443class ApiSvn(ApiAllFilesAtOnceBase):
444  """Returns all the subversion controlled files.
445
446  Warning: svn ls is abnormally slow.
447  """
448  def _get_all_files(self):
449    cmd = ['svn', 'ls', '--non-interactive', '--recursive']
450    return (
451        x for x in capture(cmd, self.root_dir).splitlines()
452        if not x.endswith(os.path.sep))
453
454
455class ApiGit(ApiAllFilesAtOnceBase):
456  def _get_all_files(self):
457    return capture(['git', 'ls-files'], cwd=self.root_dir).splitlines()
458
459
460def get_scm(dir_path, bare):
461  """Returns a properly configured ApiBase instance."""
462  cwd = os.getcwd()
463  root = get_svn_root(dir_path or cwd)
464  if root:
465    if not bare:
466      print('Found subversion checkout at %s' % root)
467    return ApiSvnQuick(dir_path or root, bare)
468  root = get_git_root(dir_path or cwd)
469  if root:
470    if not bare:
471      print('Found git repository at %s' % root)
472    return ApiGit(dir_path or root, bare)
473
474  # Returns a non-scm aware checker.
475  if not bare:
476    print('Failed to determine the SCM for %s' % dir_path)
477  return ApiBase(dir_path or cwd, bare)
478
479
480def main():
481  usage = """Usage: python %prog [--root <root>] [tocheck]
482  tocheck  Specifies the directory, relative to root, to check. This defaults
483           to "." so it checks everything.
484
485Examples:
486  python %prog
487  python %prog --root /path/to/source chrome"""
488
489  parser = optparse.OptionParser(usage=usage)
490  parser.add_option(
491      '--root',
492      help='Specifies the repository root. This defaults '
493           'to the checkout repository root')
494  parser.add_option(
495      '-v', '--verbose', action='count', default=0, help='Print debug logging')
496  parser.add_option(
497      '--bare',
498      action='store_true',
499      default=False,
500      help='Prints the bare filename triggering the checks')
501  parser.add_option(
502      '--file', action='append', dest='files',
503      help='Specifics a list of files to check the permissions of. Only these '
504      'files will be checked')
505  options, args = parser.parse_args()
506
507  levels = [logging.ERROR, logging.INFO, logging.DEBUG]
508  logging.basicConfig(level=levels[min(len(levels) - 1, options.verbose)])
509
510  if len(args) > 1:
511    parser.error('Too many arguments used')
512
513  if options.root:
514    options.root = os.path.abspath(options.root)
515
516  if options.files:
517    errors = check_files(options.root, options.files, options.bare)
518    print '\n'.join(errors)
519    return bool(errors)
520
521  api = get_scm(options.root, options.bare)
522  if args:
523    start_dir = args[0]
524  else:
525    start_dir = api.root_dir
526
527  errors = api.check(start_dir)
528
529  if not options.bare:
530    print 'Processed %s files, %d files where tested for shebang/ELF header' % (
531        api.count, api.count_read_header)
532
533  if errors:
534    if not options.bare:
535      print '\nFAILED\n'
536    print '\n'.join(errors)
537    return 1
538  if not options.bare:
539    print '\nSUCCESS\n'
540  return 0
541
542
543if '__main__' == __name__:
544  sys.exit(main())
545