1#!/usr/bin/env python
2# Copyright (c) 2015 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6
7"""Parse an LLVM coverage report to generate useable results."""
8
9
10import argparse
11import json
12import os
13import re
14import subprocess
15import sys
16
17
18def _fix_filename(filename):
19  """Return a filename which we can use to identify the file.
20
21  The file paths printed by llvm-cov take the form:
22
23      /path/to/repo/out/dir/../../src/filename.cpp
24
25  And then they're truncated to 22 characters with leading ellipses:
26
27      ...../../src/filename.cpp
28
29  This makes it really tough to determine whether the file actually belongs in
30  the Skia repo.  This function strips out the leading junk so that, if the file
31  exists in the repo, the returned string matches the end of some relative path
32  in the repo. This doesn't guarantee correctness, but it's about as close as
33  we can get.
34  """
35  return filename.split('..')[-1].lstrip('./')
36
37
38def _file_in_repo(filename, all_files):
39  """Return the name of the checked-in file matching the given filename.
40
41  Use suffix matching to determine which checked-in files the given filename
42  matches. If there are no matches or multiple matches, return None.
43  """
44  new_file = _fix_filename(filename)
45  matched = []
46  for f in all_files:
47    if f.endswith(new_file):
48      matched.append(f)
49  if len(matched) == 1:
50    return matched[0]
51  elif len(matched) > 1:
52    print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
53                          % (new_file, '\n\t'.join(matched)))
54  return None
55
56
57def _get_per_file_per_line_coverage(report):
58  """Return a dict whose keys are file names and values are coverage data.
59
60  Values are lists which take the form (lineno, coverage, code).
61  """
62  all_files = []
63  for root, dirs, files in os.walk(os.getcwd()):
64    if 'third_party/externals' in root:
65      continue
66    files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))]
67    dirs[:] = [d for d in dirs if not d[0] == '.']
68    for name in files:
69      all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name))
70  all_files.sort()
71
72  lines = report.splitlines()
73  current_file = None
74  file_lines = []
75  files = {}
76  not_checked_in = '%' # Use this as the file name for not-checked-in files.
77  for line in lines:
78    m = re.match('([a-zA-Z0-9\./_-]+):', line)
79    if m:
80      if current_file and current_file != not_checked_in:
81        files[current_file] = file_lines
82      match_filename = _file_in_repo(m.groups()[0], all_files)
83      current_file = match_filename or not_checked_in
84      file_lines = []
85    else:
86      if current_file != not_checked_in:
87        skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
88        if line and not skip:
89          cov, linenum, code = line.split('|', 2)
90          cov = cov.strip()
91          if cov:
92            cov = int(cov)
93          else:
94            cov = None # We don't care about coverage for this line.
95          linenum = int(linenum.strip())
96          assert linenum == len(file_lines) + 1
97          file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
98  return files
99
100
101
102def _testname(filename):
103  """Transform the file name into an ingestible test name."""
104  return re.sub(r'[^a-zA-Z0-9]', '_', filename)
105
106
107def _nanobench_json(results, properties, key):
108  """Return the results in JSON format like that produced by nanobench."""
109  rv = {}
110  # Copy over the properties first, then set the 'key' and 'results' keys,
111  # in order to avoid bad formatting in case the user passes in a properties
112  # dict containing those keys.
113  rv.update(properties)
114  rv['key'] = key
115  rv['results'] = {
116    _testname(f): {
117      'coverage': {
118        'percent': percent,
119        'lines_not_covered': not_covered_lines,
120        'options': {
121          'fullname': f,
122          'dir': os.path.dirname(f),
123          'source_type': 'coverage',
124        },
125      },
126    } for percent, not_covered_lines, f in results
127  }
128  return rv
129
130
131def _parse_key_value(kv_list):
132  """Return a dict whose key/value pairs are derived from the given list.
133
134  For example:
135
136      ['k1', 'v1', 'k2', 'v2']
137  becomes:
138
139      {'k1': 'v1',
140       'k2': 'v2'}
141  """
142  if len(kv_list) % 2 != 0:
143    raise Exception('Invalid key/value pairs: %s' % kv_list)
144
145  rv = {}
146  for i in xrange(len(kv_list) / 2):
147    rv[kv_list[i*2]] = kv_list[i*2+1]
148  return rv
149
150
151def _get_per_file_summaries(line_by_line):
152  """Summarize the full line-by-line coverage report by file."""
153  per_file = []
154  for filepath, lines in line_by_line.iteritems():
155    total_lines = 0
156    covered_lines = 0
157    for _, cov, _ in lines:
158      if cov is not None:
159        total_lines += 1
160        if cov > 0:
161          covered_lines += 1
162    if total_lines > 0:
163      per_file.append((float(covered_lines)/float(total_lines)*100.0,
164                       total_lines - covered_lines,
165                       filepath))
166  return per_file
167
168
169def main():
170  """Generate useful data from a coverage report."""
171  # Parse args.
172  parser = argparse.ArgumentParser()
173  parser.add_argument('--report', help='input file; an llvm coverage report.',
174                      required=True)
175  parser.add_argument('--nanobench', help='output file for nanobench data.')
176  parser.add_argument(
177      '--key', metavar='key_or_value', nargs='+',
178      help='key/value pairs identifying this bot.')
179  parser.add_argument(
180      '--properties', metavar='key_or_value', nargs='+',
181      help='key/value pairs representing properties of this build.')
182  parser.add_argument('--linebyline',
183                      help='output file for line-by-line JSON data.')
184  args = parser.parse_args()
185
186  if args.nanobench and not (args.key and args.properties):
187    raise Exception('--key and --properties are required with --nanobench')
188
189  with open(args.report) as f:
190    report = f.read()
191
192  line_by_line = _get_per_file_per_line_coverage(report)
193
194  if args.linebyline:
195    with open(args.linebyline, 'w') as f:
196      json.dump(line_by_line, f)
197
198  if args.nanobench:
199    # Parse the key and properties for use in the nanobench JSON output.
200    key = _parse_key_value(args.key)
201    properties = _parse_key_value(args.properties)
202
203    # Get per-file summaries.
204    per_file = _get_per_file_summaries(line_by_line)
205
206    # Write results.
207    format_results = _nanobench_json(per_file, properties, key)
208    with open(args.nanobench, 'w') as f:
209      json.dump(format_results, f)
210
211
212if __name__ == '__main__':
213  main()
214