1#!/usr/bin/python
2# Copyright 2015 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Generates incremental code coverage reports for Java code in Chromium.
7
8Usage:
9
10  build/android/emma_coverage_stats.py -v --out <output file path> --emma-dir
11    <EMMA file directory> --lines-for-coverage-file
12    <path to file containing lines for coverage>
13
14  Creates a JSON representation of the overall and file coverage stats and saves
15  this information to the specified output file.
16"""
17
18import argparse
19import collections
20import json
21import logging
22import os
23import re
24import sys
25from xml.etree import ElementTree
26
27import devil_chromium
28from devil.utils import run_tests_helper
29
30NOT_EXECUTABLE = -1
31NOT_COVERED = 0
32COVERED = 1
33PARTIALLY_COVERED = 2
34
35# Coverage information about a single line of code.
36LineCoverage = collections.namedtuple(
37    'LineCoverage',
38    ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
39
40
41class _EmmaHtmlParser(object):
42  """Encapsulates HTML file parsing operations.
43
44  This class contains all operations related to parsing HTML files that were
45  produced using the EMMA code coverage tool.
46
47  Example HTML:
48
49  Package links:
50    <a href="_files/1.html">org.chromium.chrome</a>
51    This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.
52
53  Class links:
54    <a href="1e.html">DoActivity.java</a>
55    This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.
56
57  Line coverage data:
58    <tr class="p">
59       <td class="l" title="78% line coverage (7 out of 9)">108</td>
60       <td title="78% line coverage (7 out of 9 instructions)">
61         if (index < 0 || index = mSelectors.size()) index = 0;</td>
62    </tr>
63    <tr>
64       <td class="l">109</td>
65       <td> </td>
66    </tr>
67    <tr class="c">
68       <td class="l">110</td>
69       <td>        if (mSelectors.get(index) != null) {</td>
70    </tr>
71    <tr class="z">
72       <td class="l">111</td>
73       <td>            for (int i = 0; i < mSelectors.size(); i++) {</td>
74    </tr>
75    Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.
76
77    We can parse this to get:
78      1. Line number
79      2. Line of source code
80      3. Coverage status (c, z, or p)
81      4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
82  """
83  # Selector to match all <a> elements within the rows that are in the table
84  # that displays all of the different packages.
85  _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'
86
87  # Selector to match all <a> elements within the rows that are in the table
88  # that displays all of the different classes within a package.
89  _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'
90
91  # Selector to match all <tr> elements within the table containing Java source
92  # code in an EMMA HTML file.
93  _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'
94
95  # Children of HTML elements are represented as a list in ElementTree. These
96  # constants represent list indices corresponding to relevant child elements.
97
98  # Child 1 contains percentage covered for a line.
99  _ELEMENT_PERCENT_COVERED = 1
100
101  # Child 1 contains the original line of source code.
102  _ELEMENT_CONTAINING_SOURCE_CODE = 1
103
104  # Child 0 contains the line number.
105  _ELEMENT_CONTAINING_LINENO = 0
106
107  # Maps CSS class names to corresponding coverage constants.
108  _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}
109
110  # UTF-8 no break space.
111  _NO_BREAK_SPACE = '\xc2\xa0'
112
113  def __init__(self, emma_file_base_dir):
114    """Initializes _EmmaHtmlParser.
115
116    Args:
117      emma_file_base_dir: Path to the location where EMMA report files are
118        stored. Should be where index.html is stored.
119    """
120    self._base_dir = emma_file_base_dir
121    self._emma_files_path = os.path.join(self._base_dir, '_files')
122    self._index_path = os.path.join(self._base_dir, 'index.html')
123
124  def GetLineCoverage(self, emma_file_path):
125    """Returns a list of LineCoverage objects for the given EMMA HTML file.
126
127    Args:
128      emma_file_path: String representing the path to the EMMA HTML file.
129
130    Returns:
131      A list of LineCoverage objects.
132    """
133    line_tr_elements = self._FindElements(
134        emma_file_path, self._XPATH_SELECT_LOC)
135    line_coverage = []
136    for tr in line_tr_elements:
137      # Get the coverage status.
138      coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)
139      # Get the fractional coverage value.
140      if coverage_status == PARTIALLY_COVERED:
141        title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))
142        # Parse string that contains percent covered: "83% line coverage ...".
143        percent_covered = title_attribute.split('%')[0]
144        fractional_coverage = int(percent_covered) / 100.0
145      else:
146        fractional_coverage = 1.0
147
148      # Get the line number.
149      lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]
150      # Handles oddly formatted HTML (where there is an extra <a> tag).
151      lineno = int(lineno_element.text or
152                   lineno_element[self._ELEMENT_CONTAINING_LINENO].text)
153      # Get the original line of Java source code.
154      raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text
155      utf8_source = raw_source.encode('UTF-8')
156      source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')
157
158      line = LineCoverage(lineno, source, coverage_status, fractional_coverage)
159      line_coverage.append(line)
160
161    return line_coverage
162
163  def GetPackageNameToEmmaFileDict(self):
164    """Returns a dict mapping Java packages to EMMA HTML coverage files.
165
166    Parses the EMMA index.html file to get a list of packages, then parses each
167    package HTML file to get a list of classes for that package, and creates
168    a dict with this info.
169
170    Returns:
171      A dict mapping string representation of Java packages (with class
172        names appended) to the corresponding file paths of EMMA HTML files.
173    """
174    # These <a> elements contain each package name and the path of the file
175    # where all classes within said package are listed.
176    package_link_elements = self._FindElements(
177        self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)
178    # Maps file path of package directory (EMMA generated) to package name.
179    # Example: emma_dir/f.html: org.chromium.chrome.
180    package_links = {
181      os.path.join(self._base_dir, link.attrib['HREF']): link.text
182      for link in package_link_elements if 'HREF' in link.attrib
183    }
184
185    package_to_emma = {}
186    for package_emma_file_path, package_name in package_links.iteritems():
187      # These <a> elements contain each class name in the current package and
188      # the path of the file where the coverage info is stored for each class.
189      coverage_file_link_elements = self._FindElements(
190          package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)
191
192      for class_name_element in coverage_file_link_elements:
193        emma_coverage_file_path = os.path.join(
194            self._emma_files_path, class_name_element.attrib['HREF'])
195        full_package_name = '%s.%s' % (package_name, class_name_element.text)
196        package_to_emma[full_package_name] = emma_coverage_file_path
197
198    return package_to_emma
199
200  # pylint: disable=no-self-use
201  def _FindElements(self, file_path, xpath_selector):
202    """Reads a HTML file and performs an XPath match.
203
204    Args:
205      file_path: String representing the path to the HTML file.
206      xpath_selector: String representing xpath search pattern.
207
208    Returns:
209      A list of ElementTree.Elements matching the given XPath selector.
210        Returns an empty list if there is no match.
211    """
212    with open(file_path) as f:
213      file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')
214      root = ElementTree.fromstring(file_contents)
215      return root.findall(xpath_selector)
216
217
218class _EmmaCoverageStats(object):
219  """Computes code coverage stats for Java code using the coverage tool EMMA.
220
221  This class provides an API that allows users to capture absolute code coverage
222  and code coverage on a subset of lines for each Java source file. Coverage
223  reports are generated in JSON format.
224  """
225  # Regular expression to get package name from Java package statement.
226  RE_PACKAGE_MATCH_GROUP = 'package'
227  RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP)
228
229  def __init__(self, emma_file_base_dir, files_for_coverage):
230    """Initialize _EmmaCoverageStats.
231
232    Args:
233      emma_file_base_dir: String representing the path to the base directory
234        where EMMA HTML coverage files are stored, i.e. parent of index.html.
235      files_for_coverage: A list of Java source code file paths to get EMMA
236        coverage for.
237    """
238    self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)
239    self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)
240
241  def GetCoverageDict(self, lines_for_coverage):
242    """Returns a dict containing detailed coverage information.
243
244    Gets detailed coverage stats for each file specified in the
245    |lines_for_coverage| dict and the total incremental number of lines covered
246    and executable for all files in |lines_for_coverage|.
247
248    Args:
249      lines_for_coverage: A dict mapping Java source file paths to lists of line
250        numbers.
251
252    Returns:
253      A dict containing coverage stats for the given dict of files and lines.
254        Contains absolute coverage stats for each file, coverage stats for each
255        file's lines specified in |lines_for_coverage|, line by line coverage
256        for each file, and overall coverage stats for the lines specified in
257        |lines_for_coverage|.
258    """
259    file_coverage = {}
260    for file_path, line_numbers in lines_for_coverage.iteritems():
261      file_coverage_dict = self.GetCoverageDictForFile(file_path, line_numbers)
262      if file_coverage_dict:
263        file_coverage[file_path] = file_coverage_dict
264      else:
265        logging.warning(
266            'No code coverage data for %s, skipping.', file_path)
267
268    covered_statuses = [s['incremental'] for s in file_coverage.itervalues()]
269    num_covered_lines = sum(s['covered'] for s in covered_statuses)
270    num_total_lines = sum(s['total'] for s in covered_statuses)
271    return {
272      'files': file_coverage,
273      'patch': {
274        'incremental': {
275          'covered': num_covered_lines,
276          'total': num_total_lines
277        }
278      }
279    }
280
281  def GetCoverageDictForFile(self, file_path, line_numbers):
282    """Returns a dict containing detailed coverage info for the given file.
283
284    Args:
285      file_path: The path to the Java source file that we want to create the
286        coverage dict for.
287      line_numbers: A list of integer line numbers to retrieve additional stats
288        for.
289
290    Returns:
291      A dict containing absolute, incremental, and line by line coverage for
292        a file.
293    """
294    if file_path not in self._source_to_emma:
295      return None
296    emma_file = self._source_to_emma[file_path]
297    total_line_coverage = self._emma_parser.GetLineCoverage(emma_file)
298    incremental_line_coverage = [line for line in total_line_coverage
299                                 if line.lineno in line_numbers]
300    line_by_line_coverage = [
301      {
302        'line': line.source,
303        'coverage': line.covered_status,
304        'changed': line.lineno in line_numbers,
305        'fractional_coverage': line.fractional_line_coverage,
306      }
307      for line in total_line_coverage
308    ]
309    total_covered_lines, total_lines = (
310        self.GetSummaryStatsForLines(total_line_coverage))
311    incremental_covered_lines, incremental_total_lines = (
312        self.GetSummaryStatsForLines(incremental_line_coverage))
313
314    file_coverage_stats = {
315      'absolute': {
316        'covered': total_covered_lines,
317        'total': total_lines
318      },
319      'incremental': {
320        'covered': incremental_covered_lines,
321        'total': incremental_total_lines
322      },
323      'source': line_by_line_coverage,
324    }
325    return file_coverage_stats
326
327  # pylint: disable=no-self-use
328  def GetSummaryStatsForLines(self, line_coverage):
329    """Gets summary stats for a given list of LineCoverage objects.
330
331    Args:
332      line_coverage: A list of LineCoverage objects.
333
334    Returns:
335      A tuple containing the number of lines that are covered and the total
336        number of lines that are executable, respectively
337    """
338    partially_covered_sum = 0
339    covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}
340    for line in line_coverage:
341      status = line.covered_status
342      if status == NOT_EXECUTABLE:
343        continue
344      covered_status_totals[status] += 1
345      if status == PARTIALLY_COVERED:
346        partially_covered_sum += line.fractional_line_coverage
347
348    total_covered = covered_status_totals[COVERED] + partially_covered_sum
349    total_lines = sum(covered_status_totals.values())
350    return total_covered, total_lines
351
352  def _GetSourceFileToEmmaFileDict(self, files):
353    """Gets a dict used to correlate Java source files with EMMA HTML files.
354
355    This method gathers the information needed to correlate EMMA HTML
356    files with Java source files. EMMA XML and plain text reports do not provide
357    line by line coverage data, so HTML reports must be used instead.
358    Unfortunately, the HTML files that are created are given garbage names
359    (i.e 1.html) so we need to manually correlate EMMA HTML files
360    with the original Java source files.
361
362    Args:
363      files: A list of file names for which coverage information is desired.
364
365    Returns:
366      A dict mapping Java source file paths to EMMA HTML file paths.
367    """
368    # Maps Java source file paths to package names.
369    # Example: /usr/code/file.java -> org.chromium.file.java.
370    source_to_package = {}
371    for file_path in files:
372      package = self.GetPackageNameFromFile(file_path)
373      if package:
374        source_to_package[file_path] = package
375      else:
376        logging.warning("Skipping %s because it doesn\'t have a package "
377                        "statement.", file_path)
378
379    # Maps package names to EMMA report HTML files.
380    # Example: org.chromium.file.java -> out/coverage/1a.html.
381    package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()
382    # Finally, we have a dict mapping Java file paths to EMMA report files.
383    # Example: /usr/code/file.java -> out/coverage/1a.html.
384    source_to_emma = {source: package_to_emma[package]
385                      for source, package in source_to_package.iteritems()
386                      if package in package_to_emma}
387    return source_to_emma
388
389  @staticmethod
390  def NeedsCoverage(file_path):
391    """Checks to see if the file needs to be analyzed for code coverage.
392
393    Args:
394      file_path: A string representing path to the file.
395
396    Returns:
397      True for Java files that exist, False for all others.
398    """
399    if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path):
400      return True
401    else:
402      logging.info('Skipping file %s, cannot compute code coverage.', file_path)
403      return False
404
405  @staticmethod
406  def GetPackageNameFromFile(file_path):
407    """Gets the full package name including the file name for a given file path.
408
409    Args:
410      file_path: String representing the path to the Java source file.
411
412    Returns:
413      A string representing the full package name with file name appended or
414        None if there is no package statement in the file.
415    """
416    with open(file_path) as f:
417      file_content = f.read()
418      package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)
419      if package_match:
420        package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)
421        file_name = os.path.basename(file_path)
422        return '%s.%s' % (package, file_name)
423      else:
424        return None
425
426
427def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):
428  """Generates a coverage report for a given set of lines.
429
430  Writes the results of the coverage analysis to the file specified by
431  |out_file_path|.
432
433  Args:
434    line_coverage_file: The path to a file which contains a dict mapping file
435      names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
436      that we should compute coverage information on lines 1 - 3 for file1.
437    out_file_path: A string representing the location to write the JSON report.
438    coverage_dir: A string representing the file path where the EMMA
439      HTML coverage files are located (i.e. folder where index.html is located).
440  """
441  with open(line_coverage_file) as f:
442    potential_files_for_coverage = json.load(f)
443
444  files_for_coverage = {f: lines
445                        for f, lines in potential_files_for_coverage.iteritems()
446                        if _EmmaCoverageStats.NeedsCoverage(f)}
447
448  coverage_results = {}
449  if files_for_coverage:
450    code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())
451    coverage_results = code_coverage.GetCoverageDict(files_for_coverage)
452  else:
453    logging.info('No Java files requiring coverage were included in %s.',
454                 line_coverage_file)
455
456  with open(out_file_path, 'w+') as out_status_file:
457    json.dump(coverage_results, out_status_file)
458
459
460def main():
461  argparser = argparse.ArgumentParser()
462  argparser.add_argument('--out', required=True, type=str,
463                         help='Report output file path.')
464  argparser.add_argument('--emma-dir', required=True, type=str,
465                         help='EMMA HTML report directory.')
466  argparser.add_argument('--lines-for-coverage-file', required=True, type=str,
467                         help='File containing a JSON object. Should contain a '
468                         'dict mapping file names to lists of line numbers of '
469                         'code for which coverage information is desired.')
470  argparser.add_argument('-v', '--verbose', action='count',
471                         help='Print verbose log information.')
472  args = argparser.parse_args()
473  run_tests_helper.SetLogLevel(args.verbose)
474  devil_chromium.Initialize()
475  GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir)
476
477
478if __name__ == '__main__':
479  sys.exit(main())
480