1#!/usr/bin/python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Checks third-party licenses for the purposes of the Android WebView build.
7
8The Android tree includes a snapshot of Chromium in order to power the system
9WebView.  This tool checks that all code uses open-source licenses compatible
10with Android, and that we meet the requirements of those licenses. It can also
11be used to generate an Android NOTICE file for the third-party code.
12
13It makes use of src/tools/licenses.py and the README.chromium files on which
14it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15which whitelists indicidual files which contain third-party code but which
16aren't in a third-party directory with a README.chromium file.
17"""
18
19import optparse
20import os
21import re
22import subprocess
23import sys
24import textwrap
25
26
27REPOSITORY_ROOT = os.path.abspath(os.path.join(
28    os.path.dirname(__file__), '..', '..'))
29
30sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
31import licenses
32
33import known_issues
34
35def GetIncompatibleDirectories():
36  """Gets a list of third-party directories which use licenses incompatible
37  with Android. This is used by the snapshot tool.
38  Returns:
39    A list of directories.
40  """
41
42  whitelist = [
43    'Apache( Version)? 2(\.0)?',
44    '(New )?([23]-Clause )?BSD( [23]-Clause)?( with advertising clause)?',
45    'L?GPL ?v?2(\.[01])?( or later)?',
46    'MIT(/X11)?(-like)?',
47    'MPL 1\.1 ?/ ?GPL 2(\.0)? ?/ ?LGPL 2\.1',
48    'MPL 2(\.0)?',
49    'Microsoft Limited Public License',
50    'Microsoft Permissive License',
51    'Public Domain',
52    'SGI Free Software License B',
53    'X11',
54  ]
55  regex = '^(%s)$' % '|'.join(whitelist)
56  result = []
57  for directory in _FindThirdPartyDirs():
58    if directory in known_issues.KNOWN_ISSUES:
59      result.append(directory)
60      continue
61    try:
62      metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
63                                   require_license_file=False)
64    except licenses.LicenseError as e:
65      print 'Got LicenseError while scanning ' + directory
66      raise
67    if metadata.get('License Android Compatible', 'no').upper() == 'YES':
68      continue
69    license = re.split(' [Ll]icenses?$', metadata['License'])[0]
70    tokens = [x.strip() for x in re.split(' and |,', license) if len(x) > 0]
71    for token in tokens:
72      if not re.match(regex, token, re.IGNORECASE):
73        result.append(directory)
74        break
75  return result
76
77class ScanResult(object):
78  Ok, Warnings, Errors = range(3)
79
80def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
81  """Checks that all files which are not in a listed third-party directory,
82  and which do not use the standard Chromium license, are whitelisted.
83  Args:
84    excluded_dirs_list: The list of directories to exclude from scanning.
85    whitelisted_files: The whitelist of files.
86  Returns:
87    ScanResult.Ok if all files with non-standard license headers are whitelisted
88    and the whitelist contains no stale entries;
89    ScanResult.Warnings if there are stale entries;
90    ScanResult.Errors if new non-whitelisted entries found.
91  """
92
93  excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d]
94  # Using a common pattern for third-partyies makes the ignore regexp shorter
95  excluded_dirs_list.append('third_party')
96  # VCS dirs
97  excluded_dirs_list.append('.git')
98  excluded_dirs_list.append('.svn')
99  # Build output
100  excluded_dirs_list.append('out/Debug')
101  excluded_dirs_list.append('out/Release')
102  # 'Copyright' appears in license agreements
103  excluded_dirs_list.append('chrome/app/resources')
104  # This is a test output directory
105  excluded_dirs_list.append('chrome/tools/test/reference_build')
106  # This is tests directory, doesn't exist in the snapshot
107  excluded_dirs_list.append('content/test/data')
108  # This is a tests directory that doesn't exist in the shipped product.
109  excluded_dirs_list.append('gin/test')
110  # This is a test output directory
111  excluded_dirs_list.append('data/dom_perf')
112  # Histogram tools, doesn't exist in the snapshot
113  excluded_dirs_list.append('tools/histograms')
114  # Swarming tools, doesn't exist in the snapshot
115  excluded_dirs_list.append('tools/swarming_client')
116  # Arm sysroot tools, doesn't exist in the snapshot
117  excluded_dirs_list.append('arm-sysroot')
118  # Data is not part of open source chromium, but are included on some bots.
119  excluded_dirs_list.append('data')
120
121  args = ['android_webview/tools/find_copyrights.pl',
122          '.'
123          ] + excluded_dirs_list
124  p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
125  lines = p.communicate()[0].splitlines()
126
127  offending_files = []
128  allowed_copyrights = '^(?:\*No copyright\*' \
129      '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
130      'All rights reserved.*)$'
131  allowed_copyrights_re = re.compile(allowed_copyrights)
132  for l in lines:
133    entries = l.split('\t')
134    if entries[1] == "GENERATED FILE":
135      continue
136    copyrights = entries[1].split(' / ')
137    for c in copyrights:
138      if c and not allowed_copyrights_re.match(c):
139        offending_files.append(os.path.normpath(entries[0]))
140        break
141
142  unknown = set(offending_files) - set(whitelisted_files)
143  if unknown:
144    print 'The following files contain a third-party license but are not in ' \
145          'a listed third-party directory and are not whitelisted. You must ' \
146          'add the following files to the whitelist.\n%s' % \
147          '\n'.join(sorted(unknown))
148
149  stale = set(whitelisted_files) - set(offending_files)
150  if stale:
151    print 'The following files are whitelisted unnecessarily. You must ' \
152          ' remove the following files from the whitelist.\n%s' % \
153          '\n'.join(sorted(stale))
154
155  if unknown:
156    return ScanResult.Errors
157  elif stale:
158    return ScanResult.Warnings
159  else:
160    return ScanResult.Ok
161
162
163def _ReadFile(path):
164  """Reads a file from disk.
165  Args:
166    path: The path of the file to read, relative to the root of the repository.
167  Returns:
168    The contents of the file as a string.
169  """
170
171  return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read()
172
173
174def _FindThirdPartyDirs():
175  """Gets the list of third-party directories.
176  Returns:
177    The list of third-party directories.
178  """
179
180  # Please don't add here paths that have problems with license files,
181  # as they will end up included in Android WebView snapshot.
182  # Instead, add them into known_issues.py.
183  prune_paths = [
184    # Placeholder directory, no third-party code.
185    os.path.join('third_party', 'adobe'),
186    # Apache 2.0 license. See
187    # https://code.google.com/p/chromium/issues/detail?id=140478.
188    os.path.join('third_party', 'bidichecker'),
189    # Isn't checked out on clients
190    os.path.join('third_party', 'gles2_conform'),
191    # The llvm-build doesn't exist for non-clang builder
192    os.path.join('third_party', 'llvm-build'),
193    # Binaries doesn't apply to android
194    os.path.join('third_party', 'widevine'),
195    # third_party directories in this tree aren't actually third party, but
196    # provide a way to shadow experimental buildfiles into those directories.
197    os.path.join('tools', 'gn', 'secondary'),
198    # Not shipped, Chromium code
199    os.path.join('tools', 'swarming_client'),
200  ]
201  third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
202  return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)
203
204
205def _Scan():
206  """Checks that license meta-data is present for all third-party code and
207     that all non third-party code doesn't contain external copyrighted code.
208  Returns:
209    ScanResult.Ok if everything is in order;
210    ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
211      entries)
212    ScanResult.Errors otherwise.
213  """
214
215  third_party_dirs = _FindThirdPartyDirs()
216
217  # First, check designated third-party directories using src/tools/licenses.py.
218  all_licenses_valid = True
219  for path in sorted(third_party_dirs):
220    try:
221      licenses.ParseDir(path, REPOSITORY_ROOT)
222    except licenses.LicenseError, e:
223      if not (path in known_issues.KNOWN_ISSUES):
224        print 'Got LicenseError "%s" while scanning %s' % (e, path)
225        all_licenses_valid = False
226
227  # Second, check for non-standard license text.
228  files_data = _ReadFile(os.path.join('android_webview', 'tools',
229                                      'third_party_files_whitelist.txt'))
230  whitelisted_files = []
231  for line in files_data.splitlines():
232    match = re.match(r'([^#\s]+)', line)
233    if match:
234      whitelisted_files.append(match.group(1))
235  licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files)
236
237  return licenses_check if all_licenses_valid else ScanResult.Errors
238
239
240def GenerateNoticeFile():
241  """Generates the contents of an Android NOTICE file for the third-party code.
242  This is used by the snapshot tool.
243  Returns:
244    The contents of the NOTICE file.
245  """
246
247  third_party_dirs = _FindThirdPartyDirs()
248
249  # Don't forget Chromium's LICENSE file
250  content = [_ReadFile('LICENSE')]
251
252  # We provide attribution for all third-party directories.
253  # TODO(steveblock): Limit this to only code used by the WebView binary.
254  for directory in sorted(third_party_dirs):
255    metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
256                                 require_license_file=False)
257    license_file = metadata['License File']
258    if license_file and license_file != licenses.NOT_SHIPPED:
259      content.append(_ReadFile(license_file))
260
261  return '\n'.join(content)
262
263
264def main():
265  class FormatterWithNewLines(optparse.IndentedHelpFormatter):
266    def format_description(self, description):
267      paras = description.split('\n')
268      formatted_paras = [textwrap.fill(para, self.width) for para in paras]
269      return '\n'.join(formatted_paras) + '\n'
270
271  parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
272                                 usage='%prog [options]')
273  parser.description = (__doc__ +
274                       '\nCommands:\n' \
275                       '  scan  Check licenses.\n' \
276                       '  notice Generate Android NOTICE file on stdout')
277  (options, args) = parser.parse_args()
278  if len(args) != 1:
279    parser.print_help()
280    return ScanResult.Errors
281
282  if args[0] == 'scan':
283    scan_result = _Scan()
284    if scan_result == ScanResult.Ok:
285      print 'OK!'
286    return scan_result
287  elif args[0] == 'notice':
288    print GenerateNoticeFile()
289    return ScanResult.Ok
290
291  parser.print_help()
292  return ScanResult.Errors
293
294if __name__ == '__main__':
295  sys.exit(main())
296