1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Makes sure files have the right permissions.
7
8Some developers have broken SCM configurations that flip the executable
9permission on for no good reason. Unix developers who run ls --color will then
10see .cc files in green and get confused.
11
12- For file extensions that must be executable, add it to EXECUTABLE_EXTENSIONS.
13- For file extensions that must not be executable, add it to
14  NOT_EXECUTABLE_EXTENSIONS.
15- To ignore all the files inside a directory, add it to IGNORED_PATHS.
16- For file base name with ambiguous state and that should not be checked for
17  shebang, add it to IGNORED_FILENAMES.
18
19Any file not matching the above will be opened and looked if it has a shebang
20or an ELF header. If this does not match the executable bit on the file, the
21file will be flagged.
22
23Note that all directory separators must be slashes (Unix-style) and not
24backslashes. All directories should be relative to the source root and all
25file paths should be only lowercase.
26"""
27
28import json
29import logging
30import optparse
31import os
32import stat
33import string
34import subprocess
35import sys
36
37#### USER EDITABLE SECTION STARTS HERE ####
38
39# Files with these extensions must have executable bit set.
40#
41# Case-sensitive.
42EXECUTABLE_EXTENSIONS = (
43  'bat',
44  'dll',
45  'dylib',
46  'exe',
47)
48
49# These files must have executable bit set.
50#
51# Case-insensitive, lower-case only.
52EXECUTABLE_PATHS = (
53  'chrome/test/data/app_shim/app_shim_32_bit.app/contents/'
54      'macos/app_mode_loader',
55  'chrome/test/data/extensions/uitest/plugins/plugin.plugin/contents/'
56      'macos/testnetscapeplugin',
57  'chrome/test/data/extensions/uitest/plugins_private/plugin.plugin/contents/'
58      'macos/testnetscapeplugin',
59)
60
61# These files must not have the executable bit set. This is mainly a performance
62# optimization as these files are not checked for shebang. The list was
63# partially generated from:
64# git ls-files | grep "\\." | sed 's/.*\.//' | sort | uniq -c | sort -b -g
65#
66# Case-sensitive.
67NON_EXECUTABLE_EXTENSIONS = (
68  '1',
69  '3ds',
70  'S',
71  'am',
72  'applescript',
73  'asm',
74  'c',
75  'cc',
76  'cfg',
77  'chromium',
78  'cpp',
79  'crx',
80  'cs',
81  'css',
82  'cur',
83  'def',
84  'der',
85  'expected',
86  'gif',
87  'grd',
88  'gyp',
89  'gypi',
90  'h',
91  'hh',
92  'htm',
93  'html',
94  'hyph',
95  'ico',
96  'idl',
97  'java',
98  'jpg',
99  'js',
100  'json',
101  'm',
102  'm4',
103  'mm',
104  'mms',
105  'mock-http-headers',
106  'nexe',
107  'nmf',
108  'onc',
109  'pat',
110  'patch',
111  'pdf',
112  'pem',
113  'plist',
114  'png',
115  'proto',
116  'rc',
117  'rfx',
118  'rgs',
119  'rules',
120  'spec',
121  'sql',
122  'srpc',
123  'svg',
124  'tcl',
125  'test',
126  'tga',
127  'txt',
128  'vcproj',
129  'vsprops',
130  'webm',
131  'word',
132  'xib',
133  'xml',
134  'xtb',
135  'zip',
136)
137
138# These files must not have executable bit set.
139#
140# Case-insensitive, lower-case only.
141NON_EXECUTABLE_PATHS = (
142  'build/android/tests/symbolize/liba.so',
143  'build/android/tests/symbolize/libb.so',
144  'chrome/installer/mac/sign_app.sh.in',
145  'chrome/installer/mac/sign_versioned_dir.sh.in',
146  'chrome/test/data/extensions/uitest/plugins/plugin32.so',
147  'chrome/test/data/extensions/uitest/plugins/plugin64.so',
148  'chrome/test/data/extensions/uitest/plugins_private/plugin32.so',
149  'chrome/test/data/extensions/uitest/plugins_private/plugin64.so',
150  'components/test/data/component_updater/ihfokbkgjpifnbbojhneepfflplebdkc/'
151      'ihfokbkgjpifnbbojhneepfflplebdkc_1/a_changing_binary_file',
152  'components/test/data/component_updater/ihfokbkgjpifnbbojhneepfflplebdkc/'
153      'ihfokbkgjpifnbbojhneepfflplebdkc_2/a_changing_binary_file',
154  'courgette/testdata/elf-32-1',
155  'courgette/testdata/elf-32-2',
156  'courgette/testdata/elf-64',
157)
158
159# File names that are always whitelisted.  (These are mostly autoconf spew.)
160#
161# Case-sensitive.
162IGNORED_FILENAMES = (
163  'config.guess',
164  'config.sub',
165  'configure',
166  'depcomp',
167  'install-sh',
168  'missing',
169  'mkinstalldirs',
170  'naclsdk',
171  'scons',
172)
173
174# File paths starting with one of these will be ignored as well.
175# Please consider fixing your file permissions, rather than adding to this list.
176#
177# Case-insensitive, lower-case only.
178IGNORED_PATHS = (
179  'native_client_sdk/src/build_tools/sdk_tools/third_party/fancy_urllib/'
180      '__init__.py',
181  'out/',
182  # TODO(maruel): Fix these.
183  'third_party/android_testrunner/',
184  'third_party/bintrees/',
185  'third_party/closure_linter/',
186  'third_party/devscripts/licensecheck.pl.vanilla',
187  'third_party/hyphen/',
188  'third_party/jemalloc/',
189  'third_party/lcov-1.9/contrib/galaxy/conglomerate_functions.pl',
190  'third_party/lcov-1.9/contrib/galaxy/gen_makefile.sh',
191  'third_party/lcov/contrib/galaxy/conglomerate_functions.pl',
192  'third_party/lcov/contrib/galaxy/gen_makefile.sh',
193  'third_party/libevent/autogen.sh',
194  'third_party/libevent/test/test.sh',
195  'third_party/libxml/linux/xml2-config',
196  'third_party/libxml/src/ltmain.sh',
197  'third_party/mesa/',
198  'third_party/protobuf/',
199  'third_party/python_gflags/gflags.py',
200  'third_party/sqlite/',
201  'third_party/talloc/script/mksyms.sh',
202  'third_party/tcmalloc/',
203  'third_party/tlslite/setup.py',
204)
205
206#### USER EDITABLE SECTION ENDS HERE ####
207
208assert set(EXECUTABLE_EXTENSIONS) & set(NON_EXECUTABLE_EXTENSIONS) == set()
209assert set(EXECUTABLE_PATHS) & set(NON_EXECUTABLE_PATHS) == set()
210
211VALID_CHARS = set(string.ascii_lowercase + string.digits + '/-_.')
212for paths in (EXECUTABLE_PATHS, NON_EXECUTABLE_PATHS, IGNORED_PATHS):
213  assert all([set(path).issubset(VALID_CHARS) for path in paths])
214
215
216def capture(cmd, cwd):
217  """Returns the output of a command.
218
219  Ignores the error code or stderr.
220  """
221  logging.debug('%s; cwd=%s' % (' '.join(cmd), cwd))
222  env = os.environ.copy()
223  env['LANGUAGE'] = 'en_US.UTF-8'
224  p = subprocess.Popen(
225      cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, env=env)
226  return p.communicate()[0]
227
228
229def get_git_root(dir_path):
230  """Returns the git checkout root or None."""
231  root = capture(['git', 'rev-parse', '--show-toplevel'], dir_path).strip()
232  if root:
233    return root
234
235
236def is_ignored(rel_path):
237  """Returns True if rel_path is in our whitelist of files to ignore."""
238  rel_path = rel_path.lower()
239  return (
240      os.path.basename(rel_path) in IGNORED_FILENAMES or
241      rel_path.lower().startswith(IGNORED_PATHS))
242
243
244def must_be_executable(rel_path):
245  """The file name represents a file type that must have the executable bit
246  set.
247  """
248  return (os.path.splitext(rel_path)[1][1:] in EXECUTABLE_EXTENSIONS or
249          rel_path.lower() in EXECUTABLE_PATHS)
250
251
252def must_not_be_executable(rel_path):
253  """The file name represents a file type that must not have the executable
254  bit set.
255  """
256  return (os.path.splitext(rel_path)[1][1:] in NON_EXECUTABLE_EXTENSIONS or
257          rel_path.lower() in NON_EXECUTABLE_PATHS)
258
259
260def has_executable_bit(full_path):
261  """Returns if any executable bit is set."""
262  permission = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
263  return bool(permission & os.stat(full_path).st_mode)
264
265
266def has_shebang_or_is_elf(full_path):
267  """Returns if the file starts with #!/ or is an ELF binary.
268
269  full_path is the absolute path to the file.
270  """
271  with open(full_path, 'rb') as f:
272    data = f.read(4)
273    return (data[:3] == '#!/' or data == '#! /', data == '\x7fELF')
274
275
276def check_file(root_path, rel_path):
277  """Checks the permissions of the file whose path is root_path + rel_path and
278  returns an error if it is inconsistent. Returns None on success.
279
280  It is assumed that the file is not ignored by is_ignored().
281
282  If the file name is matched with must_be_executable() or
283  must_not_be_executable(), only its executable bit is checked.
284  Otherwise, the first few bytes of the file are read to verify if it has a
285  shebang or ELF header and compares this with the executable bit on the file.
286  """
287  full_path = os.path.join(root_path, rel_path)
288  def result_dict(error):
289    return {
290      'error': error,
291      'full_path': full_path,
292      'rel_path': rel_path,
293    }
294  try:
295    bit = has_executable_bit(full_path)
296  except OSError:
297    # It's faster to catch exception than call os.path.islink(). Chromium
298    # tree happens to have invalid symlinks under
299    # third_party/openssl/openssl/test/.
300    return None
301
302  if must_be_executable(rel_path):
303    if not bit:
304      return result_dict('Must have executable bit set')
305    return
306  if must_not_be_executable(rel_path):
307    if bit:
308      return result_dict('Must not have executable bit set')
309    return
310
311  # For the others, it depends on the file header.
312  (shebang, elf) = has_shebang_or_is_elf(full_path)
313  if bit != (shebang or elf):
314    if bit:
315      return result_dict('Has executable bit but not shebang or ELF header')
316    if shebang:
317      return result_dict('Has shebang but not executable bit')
318    return result_dict('Has ELF header but not executable bit')
319
320
321def check_files(root, files):
322  gen = (check_file(root, f) for f in files if not is_ignored(f))
323  return filter(None, gen)
324
325
326class ApiBase(object):
327  def __init__(self, root_dir, bare_output):
328    self.root_dir = root_dir
329    self.bare_output = bare_output
330    self.count = 0
331    self.count_read_header = 0
332
333  def check_file(self, rel_path):
334    logging.debug('check_file(%s)' % rel_path)
335    self.count += 1
336
337    if (not must_be_executable(rel_path) and
338        not must_not_be_executable(rel_path)):
339      self.count_read_header += 1
340
341    return check_file(self.root_dir, rel_path)
342
343  def check_dir(self, rel_path):
344    return self.check(rel_path)
345
346  def check(self, start_dir):
347    """Check the files in start_dir, recursively check its subdirectories."""
348    errors = []
349    items = self.list_dir(start_dir)
350    logging.info('check(%s) -> %d' % (start_dir, len(items)))
351    for item in items:
352      full_path = os.path.join(self.root_dir, start_dir, item)
353      rel_path = full_path[len(self.root_dir) + 1:]
354      if is_ignored(rel_path):
355        continue
356      if os.path.isdir(full_path):
357        # Depth first.
358        errors.extend(self.check_dir(rel_path))
359      else:
360        error = self.check_file(rel_path)
361        if error:
362          errors.append(error)
363    return errors
364
365  def list_dir(self, start_dir):
366    """Lists all the files and directory inside start_dir."""
367    return sorted(
368      x for x in os.listdir(os.path.join(self.root_dir, start_dir))
369      if not x.startswith('.')
370    )
371
372
373class ApiAllFilesAtOnceBase(ApiBase):
374  _files = None
375
376  def list_dir(self, start_dir):
377    """Lists all the files and directory inside start_dir."""
378    if self._files is None:
379      self._files = sorted(self._get_all_files())
380      if not self.bare_output:
381        print 'Found %s files' % len(self._files)
382    start_dir = start_dir[len(self.root_dir) + 1:]
383    return [
384      x[len(start_dir):] for x in self._files if x.startswith(start_dir)
385    ]
386
387  def _get_all_files(self):
388    """Lists all the files and directory inside self._root_dir."""
389    raise NotImplementedError()
390
391
392class ApiGit(ApiAllFilesAtOnceBase):
393  def _get_all_files(self):
394    return capture(['git', 'ls-files'], cwd=self.root_dir).splitlines()
395
396
397def get_scm(dir_path, bare):
398  """Returns a properly configured ApiBase instance."""
399  cwd = os.getcwd()
400  root = get_git_root(dir_path or cwd)
401  if root:
402    if not bare:
403      print('Found git repository at %s' % root)
404    return ApiGit(dir_path or root, bare)
405
406  # Returns a non-scm aware checker.
407  if not bare:
408    print('Failed to determine the SCM for %s' % dir_path)
409  return ApiBase(dir_path or cwd, bare)
410
411
412def main():
413  usage = """Usage: python %prog [--root <root>] [tocheck]
414  tocheck  Specifies the directory, relative to root, to check. This defaults
415           to "." so it checks everything.
416
417Examples:
418  python %prog
419  python %prog --root /path/to/source chrome"""
420
421  parser = optparse.OptionParser(usage=usage)
422  parser.add_option(
423      '--root',
424      help='Specifies the repository root. This defaults '
425           'to the checkout repository root')
426  parser.add_option(
427      '-v', '--verbose', action='count', default=0, help='Print debug logging')
428  parser.add_option(
429      '--bare',
430      action='store_true',
431      default=False,
432      help='Prints the bare filename triggering the checks')
433  parser.add_option(
434      '--file', action='append', dest='files',
435      help='Specifics a list of files to check the permissions of. Only these '
436      'files will be checked')
437  parser.add_option('--json', help='Path to JSON output file')
438  options, args = parser.parse_args()
439
440  levels = [logging.ERROR, logging.INFO, logging.DEBUG]
441  logging.basicConfig(level=levels[min(len(levels) - 1, options.verbose)])
442
443  if len(args) > 1:
444    parser.error('Too many arguments used')
445
446  if options.root:
447    options.root = os.path.abspath(options.root)
448
449  if options.files:
450    # --file implies --bare (for PRESUBMIT.py).
451    options.bare = True
452
453    errors = check_files(options.root, options.files)
454  else:
455    api = get_scm(options.root, options.bare)
456    start_dir = args[0] if args else api.root_dir
457    errors = api.check(start_dir)
458
459    if not options.bare:
460      print('Processed %s files, %d files where tested for shebang/ELF '
461            'header' % (api.count, api.count_read_header))
462
463  if options.json:
464    with open(options.json, 'w') as f:
465      json.dump(errors, f)
466
467  if errors:
468    if options.bare:
469      print '\n'.join(e['full_path'] for e in errors)
470    else:
471      print '\nFAILED\n'
472      print '\n'.join('%s: %s' % (e['full_path'], e['error']) for e in errors)
473    return 1
474  if not options.bare:
475    print '\nSUCCESS\n'
476  return 0
477
478
479if '__main__' == __name__:
480  sys.exit(main())
481