1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Crocodile - compute coverage numbers for Chrome coverage dashboard."""
7
8import optparse
9import os
10import platform
11import re
12import sys
13import croc_html
14import croc_scan
15
16
17class CrocError(Exception):
18  """Coverage error."""
19
20
21class CrocStatError(CrocError):
22  """Error evaluating coverage stat."""
23
24#------------------------------------------------------------------------------
25
26
27class CoverageStats(dict):
28  """Coverage statistics."""
29
30  # Default dictionary values for this stat.
31  DEFAULTS = { 'files_covered': 0,
32               'files_instrumented': 0,
33               'files_executable': 0,
34               'lines_covered': 0,
35               'lines_instrumented': 0,
36               'lines_executable': 0 }
37
38  def Add(self, coverage_stats):
39    """Adds a contribution from another coverage stats dict.
40
41    Args:
42      coverage_stats: Statistics to add to this one.
43    """
44    for k, v in coverage_stats.iteritems():
45      if k in self:
46        self[k] += v
47      else:
48        self[k] = v
49
50  def AddDefaults(self):
51    """Add some default stats which might be assumed present.
52
53    Do not clobber if already present.  Adds resilience when evaling a
54    croc file which expects certain stats to exist."""
55    for k, v in self.DEFAULTS.iteritems():
56      if not k in self:
57        self[k] = v
58
59#------------------------------------------------------------------------------
60
61
62class CoveredFile(object):
63  """Information about a single covered file."""
64
65  def __init__(self, filename, **kwargs):
66    """Constructor.
67
68    Args:
69      filename: Full path to file, '/'-delimited.
70      kwargs: Keyword args are attributes for file.
71    """
72    self.filename = filename
73    self.attrs = dict(kwargs)
74
75    # Move these to attrs?
76    self.local_path = None      # Local path to file
77    self.in_lcov = False        # Is file instrumented?
78
79    # No coverage data for file yet
80    self.lines = {}     # line_no -> None=executable, 0=instrumented, 1=covered
81    self.stats = CoverageStats()
82
83  def UpdateCoverage(self):
84    """Updates the coverage summary based on covered lines."""
85    exe = instr = cov = 0
86    for l in self.lines.itervalues():
87      exe += 1
88      if l is not None:
89        instr += 1
90        if l == 1:
91          cov += 1
92
93    # Add stats that always exist
94    self.stats = CoverageStats(lines_executable=exe,
95                               lines_instrumented=instr,
96                               lines_covered=cov,
97                               files_executable=1)
98
99    # Add conditional stats
100    if cov:
101      self.stats['files_covered'] = 1
102    if instr or self.in_lcov:
103      self.stats['files_instrumented'] = 1
104
105#------------------------------------------------------------------------------
106
107
108class CoveredDir(object):
109  """Information about a directory containing covered files."""
110
111  def __init__(self, dirpath):
112    """Constructor.
113
114    Args:
115      dirpath: Full path of directory, '/'-delimited.
116    """
117    self.dirpath = dirpath
118
119    # List of covered files directly in this dir, indexed by filename (not
120    # full path)
121    self.files = {}
122
123    # List of subdirs, indexed by filename (not full path)
124    self.subdirs = {}
125
126    # Dict of CoverageStats objects summarizing all children, indexed by group
127    self.stats_by_group = {'all': CoverageStats()}
128    # TODO: by language
129
130  def GetTree(self, indent=''):
131    """Recursively gets stats for the directory and its children.
132
133    Args:
134      indent: indent prefix string.
135
136    Returns:
137      The tree as a string.
138    """
139    dest = []
140
141    # Compile all groupstats
142    groupstats = []
143    for group in sorted(self.stats_by_group):
144      s = self.stats_by_group[group]
145      if not s.get('lines_executable'):
146        continue        # Skip groups with no executable lines
147      groupstats.append('%s:%d/%d/%d' % (
148          group, s.get('lines_covered', 0),
149          s.get('lines_instrumented', 0),
150          s.get('lines_executable', 0)))
151
152    outline = '%s%-30s   %s' % (indent,
153                                os.path.split(self.dirpath)[1] + '/',
154                                '   '.join(groupstats))
155    dest.append(outline.rstrip())
156
157    for d in sorted(self.subdirs):
158      dest.append(self.subdirs[d].GetTree(indent=indent + '  '))
159
160    return '\n'.join(dest)
161
162#------------------------------------------------------------------------------
163
164
165class Coverage(object):
166  """Code coverage for a group of files."""
167
168  def __init__(self):
169    """Constructor."""
170    self.files = {}             # Map filename --> CoverageFile
171    self.root_dirs = []         # (root, altname)
172    self.rules = []             # (regexp, dict of RHS attrs)
173    self.tree = CoveredDir('')
174    self.print_stats = []       # Dicts of args to PrintStat()
175
176    # Functions which need to be replaced for unit testing
177    self.add_files_walk = os.walk         # Walk function for AddFiles()
178    self.scan_file = croc_scan.ScanFile   # Source scanner for AddFiles()
179
180  def CleanupFilename(self, filename):
181    """Cleans up a filename.
182
183    Args:
184      filename: Input filename.
185
186    Returns:
187      The cleaned up filename.
188
189    Changes all path separators to '/'.
190    Makes relative paths (those starting with '../' or './' absolute.
191    Replaces all instances of root dirs with alternate names.
192    """
193    # Change path separators
194    filename = filename.replace('\\', '/')
195
196    # Windows doesn't care about case sensitivity.
197    if platform.system() in ['Windows', 'Microsoft']:
198      filename = filename.lower()
199
200    # If path is relative, make it absolute
201    # TODO: Perhaps we should default to relative instead, and only understand
202    # absolute to be files starting with '\', '/', or '[A-Za-z]:'?
203    if filename.split('/')[0] in ('.', '..'):
204      filename = os.path.abspath(filename).replace('\\', '/')
205
206    # Replace alternate roots
207    for root, alt_name in self.root_dirs:
208      # Windows doesn't care about case sensitivity.
209      if platform.system() in ['Windows', 'Microsoft']:
210        root = root.lower()
211      filename = re.sub('^' + re.escape(root) + '(?=(/|$))',
212                        alt_name, filename)
213    return filename
214
215  def ClassifyFile(self, filename):
216    """Applies rules to a filename, to see if we care about it.
217
218    Args:
219      filename: Input filename.
220
221    Returns:
222      A dict of attributes for the file, accumulated from the right hand sides
223          of rules which fired.
224    """
225    attrs = {}
226
227    # Process all rules
228    for regexp, rhs_dict in self.rules:
229      if regexp.match(filename):
230        attrs.update(rhs_dict)
231
232    return attrs
233    # TODO: Files can belong to multiple groups?
234    #   (test/source)
235    #   (mac/pc/win)
236    #   (media_test/all_tests)
237    #   (small/med/large)
238    # How to handle that?
239
240  def AddRoot(self, root_path, alt_name='_'):
241    """Adds a root directory.
242
243    Args:
244      root_path: Root directory to add.
245      alt_name: If specified, name of root dir.  Otherwise, defaults to '_'.
246
247    Raises:
248      ValueError: alt_name was blank.
249    """
250    # Alt name must not be blank.  If it were, there wouldn't be a way to
251    # reverse-resolve from a root-replaced path back to the local path, since
252    # '' would always match the beginning of the candidate filename, resulting
253    # in an infinite loop.
254    if not alt_name:
255      raise ValueError('AddRoot alt_name must not be blank.')
256
257    # Clean up root path based on existing rules
258    self.root_dirs.append([self.CleanupFilename(root_path), alt_name])
259
260  def AddRule(self, path_regexp, **kwargs):
261    """Adds a rule.
262
263    Args:
264      path_regexp: Regular expression to match for filenames.  These are
265          matched after root directory replacement.
266      kwargs: Keyword arguments are attributes to set if the rule applies.
267
268    Keyword arguments currently supported:
269      include: If True, includes matches; if False, excludes matches.  Ignored
270          if None.
271      group: If not None, sets group to apply to matches.
272      language: If not None, sets file language to apply to matches.
273    """
274
275    # Compile regexp ahead of time
276    self.rules.append([re.compile(path_regexp), dict(kwargs)])
277
278  def GetCoveredFile(self, filename, add=False):
279    """Gets the CoveredFile object for the filename.
280
281    Args:
282      filename: Name of file to find.
283      add: If True, will add the file if it's not present.  This applies the
284          transformations from AddRoot() and AddRule(), and only adds the file
285          if a rule includes it, and it has a group and language.
286
287    Returns:
288      The matching CoveredFile object, or None if not present.
289    """
290    # Clean filename
291    filename = self.CleanupFilename(filename)
292
293    # Check for existing match
294    if filename in self.files:
295      return self.files[filename]
296
297    # File isn't one we know about.  If we can't add it, give up.
298    if not add:
299      return None
300
301    # Check rules to see if file can be added.  Files must be included and
302    # have a group and language.
303    attrs = self.ClassifyFile(filename)
304    if not (attrs.get('include')
305            and attrs.get('group')
306            and attrs.get('language')):
307      return None
308
309    # Add the file
310    f = CoveredFile(filename, **attrs)
311    self.files[filename] = f
312
313    # Return the newly covered file
314    return f
315
316  def RemoveCoveredFile(self, cov_file):
317    """Removes the file from the covered file list.
318
319    Args:
320      cov_file: A file object returned by GetCoveredFile().
321    """
322    self.files.pop(cov_file.filename)
323
324  def ParseLcovData(self, lcov_data):
325    """Adds coverage from LCOV-formatted data.
326
327    Args:
328      lcov_data: An iterable returning lines of data in LCOV format.  For
329          example, a file or list of strings.
330    """
331    cov_file = None
332    cov_lines = None
333    for line in lcov_data:
334      line = line.strip()
335      if line.startswith('SF:'):
336        # Start of data for a new file; payload is filename
337        cov_file = self.GetCoveredFile(line[3:], add=True)
338        if cov_file:
339          cov_lines = cov_file.lines
340          cov_file.in_lcov = True       # File was instrumented
341      elif not cov_file:
342        # Inside data for a file we don't care about - so skip it
343        pass
344      elif line.startswith('DA:'):
345        # Data point - that is, an executable line in current file
346        line_no, is_covered = map(int, line[3:].split(','))
347        if is_covered:
348          # Line is covered
349          cov_lines[line_no] = 1
350        elif cov_lines.get(line_no) != 1:
351          # Line is not covered, so track it as uncovered
352          cov_lines[line_no] = 0
353      elif line == 'end_of_record':
354        cov_file.UpdateCoverage()
355        cov_file = None
356      # (else ignore other line types)
357
358  def ParseLcovFile(self, input_filename):
359    """Adds coverage data from a .lcov file.
360
361    Args:
362      input_filename: Input filename.
363    """
364    # TODO: All manner of error checking
365    lcov_file = None
366    try:
367      lcov_file = open(input_filename, 'rt')
368      self.ParseLcovData(lcov_file)
369    finally:
370      if lcov_file:
371        lcov_file.close()
372
373  def GetStat(self, stat, group='all', default=None):
374    """Gets a statistic from the coverage object.
375
376    Args:
377      stat: Statistic to get.  May also be an evaluatable python expression,
378          using the stats.  For example, 'stat1 - stat2'.
379      group: File group to match; if 'all', matches all groups.
380      default: Value to return if there was an error evaluating the stat.  For
381          example, if the stat does not exist.  If None, raises
382          CrocStatError.
383
384    Returns:
385      The evaluated stat, or None if error.
386
387    Raises:
388      CrocStatError: Error evaluating stat.
389    """
390    # TODO: specify a subdir to get the stat from, then walk the tree to
391    # print the stats from just that subdir
392
393    # Make sure the group exists
394    if group not in self.tree.stats_by_group:
395      if default is None:
396        raise CrocStatError('Group %r not found.' % group)
397      else:
398        return default
399
400    stats = self.tree.stats_by_group[group]
401    # Unit tests use real dicts, not CoverageStats objects,
402    # so we can't AddDefaults() on them.
403    if group == 'all' and hasattr(stats, 'AddDefaults'):
404      stats.AddDefaults()
405    try:
406      return eval(stat, {'__builtins__': {'S': self.GetStat}}, stats)
407    except Exception, e:
408      if default is None:
409        raise CrocStatError('Error evaluating stat %r: %s' % (stat, e))
410      else:
411        return default
412
413  def PrintStat(self, stat, format=None, outfile=sys.stdout, **kwargs):
414    """Prints a statistic from the coverage object.
415
416    Args:
417      stat: Statistic to get.  May also be an evaluatable python expression,
418          using the stats.  For example, 'stat1 - stat2'.
419      format: Format string to use when printing stat.  If None, prints the
420          stat and its evaluation.
421      outfile: File stream to output stat to; defaults to stdout.
422      kwargs: Additional args to pass to GetStat().
423    """
424    s = self.GetStat(stat, **kwargs)
425    if format is None:
426      outfile.write('GetStat(%r) = %s\n' % (stat, s))
427    else:
428      outfile.write(format % s + '\n')
429
430  def AddFiles(self, src_dir):
431    """Adds files to coverage information.
432
433    LCOV files only contains files which are compiled and instrumented as part
434    of running coverage.  This function finds missing files and adds them.
435
436    Args:
437      src_dir: Directory on disk at which to start search.  May be a relative
438          path on disk starting with '.' or '..', or an absolute path, or a
439          path relative to an alt_name for one of the roots
440          (for example, '_/src').  If the alt_name matches more than one root,
441          all matches will be attempted.
442
443    Note that dirs not underneath one of the root dirs and covered by an
444    inclusion rule will be ignored.
445    """
446    # Check for root dir alt_names in the path and replace with the actual
447    # root dirs, then recurse.
448    found_root = False
449    for root, alt_name in self.root_dirs:
450      replaced_root = re.sub('^' + re.escape(alt_name) + '(?=(/|$))', root,
451                             src_dir)
452      if replaced_root != src_dir:
453        found_root = True
454        self.AddFiles(replaced_root)
455    if found_root:
456      return    # Replaced an alt_name with a root_dir, so already recursed.
457
458    for (dirpath, dirnames, filenames) in self.add_files_walk(src_dir):
459      # Make a copy of the dirnames list so we can modify the original to
460      # prune subdirs we don't need to walk.
461      for d in list(dirnames):
462        # Add trailing '/' to directory names so dir-based regexps can match
463        # '/' instead of needing to specify '(/|$)'.
464        dpath = self.CleanupFilename(dirpath + '/' + d) + '/'
465        attrs = self.ClassifyFile(dpath)
466        if not attrs.get('include'):
467          # Directory has been excluded, so don't traverse it
468          # TODO: Document the slight weirdness caused by this: If you
469          # AddFiles('./A'), and the rules include 'A/B/C/D' but not 'A/B',
470          # then it won't recurse into './A/B' so won't find './A/B/C/D'.
471          # Workarounds are to AddFiles('./A/B/C/D') or AddFiles('./A/B/C').
472          # The latter works because it explicitly walks the contents of the
473          # path passed to AddFiles(), so it finds './A/B/C/D'.
474          dirnames.remove(d)
475
476      for f in filenames:
477        local_path = dirpath + '/' + f
478
479        covf = self.GetCoveredFile(local_path, add=True)
480        if not covf:
481          continue
482
483        # Save where we found the file, for generating line-by-line HTML output
484        covf.local_path = local_path
485
486        if covf.in_lcov:
487          # File already instrumented and doesn't need to be scanned
488          continue
489
490        if not covf.attrs.get('add_if_missing', 1):
491          # Not allowed to add the file
492          self.RemoveCoveredFile(covf)
493          continue
494
495        # Scan file to find potentially-executable lines
496        lines = self.scan_file(covf.local_path, covf.attrs.get('language'))
497        if lines:
498          for l in lines:
499            covf.lines[l] = None
500          covf.UpdateCoverage()
501        else:
502          # File has no executable lines, so don't count it
503          self.RemoveCoveredFile(covf)
504
505  def AddConfig(self, config_data, lcov_queue=None, addfiles_queue=None):
506    """Adds JSON-ish config data.
507
508    Args:
509      config_data: Config data string.
510      lcov_queue: If not None, object to append lcov_files to instead of
511          parsing them immediately.
512      addfiles_queue: If not None, object to append add_files to instead of
513          processing them immediately.
514    """
515    # TODO: All manner of error checking
516    cfg = eval(config_data, {'__builtins__': {}}, {})
517
518    for rootdict in cfg.get('roots', []):
519      self.AddRoot(rootdict['root'], alt_name=rootdict.get('altname', '_'))
520
521    for ruledict in cfg.get('rules', []):
522      regexp = ruledict.pop('regexp')
523      self.AddRule(regexp, **ruledict)
524
525    for add_lcov in cfg.get('lcov_files', []):
526      if lcov_queue is not None:
527        lcov_queue.append(add_lcov)
528      else:
529        self.ParseLcovFile(add_lcov)
530
531    for add_path in cfg.get('add_files', []):
532      if addfiles_queue is not None:
533        addfiles_queue.append(add_path)
534      else:
535        self.AddFiles(add_path)
536
537    self.print_stats += cfg.get('print_stats', [])
538
539  def ParseConfig(self, filename, **kwargs):
540    """Parses a configuration file.
541
542    Args:
543      filename: Config filename.
544      kwargs: Additional parameters to pass to AddConfig().
545    """
546    # TODO: All manner of error checking
547    f = None
548    try:
549      f = open(filename, 'rt')
550      # Need to strip CR's from CRLF-terminated lines or posix systems can't
551      # eval the data.
552      config_data = f.read().replace('\r\n', '\n')
553      # TODO: some sort of include syntax.
554      #
555      # Needs to be done at string-time rather than at eval()-time, so that
556      # it's possible to include parts of dicts.  Path from a file to its
557      # include should be relative to the dir containing the file.
558      #
559      # Or perhaps it could be done after eval.  In that case, there'd be an
560      # 'include' section with a list of files to include.  Those would be
561      # eval()'d and recursively pre- or post-merged with the including file.
562      #
563      # Or maybe just don't worry about it, since multiple configs can be
564      # specified on the command line.
565      self.AddConfig(config_data, **kwargs)
566    finally:
567      if f:
568        f.close()
569
570  def UpdateTreeStats(self):
571    """Recalculates the tree stats from the currently covered files.
572
573    Also calculates coverage summary for files.
574    """
575    self.tree = CoveredDir('')
576    for cov_file in self.files.itervalues():
577      # Add the file to the tree
578      fdirs = cov_file.filename.split('/')
579      parent = self.tree
580      ancestors = [parent]
581      for d in fdirs[:-1]:
582        if d not in parent.subdirs:
583          if parent.dirpath:
584            parent.subdirs[d] = CoveredDir(parent.dirpath + '/' + d)
585          else:
586            parent.subdirs[d] = CoveredDir(d)
587        parent = parent.subdirs[d]
588        ancestors.append(parent)
589      # Final subdir actually contains the file
590      parent.files[fdirs[-1]] = cov_file
591
592      # Now add file's contribution to coverage by dir
593      for a in ancestors:
594        # Add to 'all' group
595        a.stats_by_group['all'].Add(cov_file.stats)
596
597        # Add to group file belongs to
598        group = cov_file.attrs.get('group')
599        if group not in a.stats_by_group:
600          a.stats_by_group[group] = CoverageStats()
601        cbyg = a.stats_by_group[group]
602        cbyg.Add(cov_file.stats)
603
604  def PrintTree(self):
605    """Prints the tree stats."""
606    # Print the tree
607    print 'Lines of code coverage by directory:'
608    print self.tree.GetTree()
609
610#------------------------------------------------------------------------------
611
612
613def Main(argv):
614  """Main routine.
615
616  Args:
617    argv: list of arguments
618
619  Returns:
620    exit code, 0 for normal exit.
621  """
622  # Parse args
623  parser = optparse.OptionParser()
624  parser.add_option(
625      '-i', '--input', dest='inputs', type='string', action='append',
626      metavar='FILE',
627      help='read LCOV input from FILE')
628  parser.add_option(
629      '-r', '--root', dest='roots', type='string', action='append',
630      metavar='ROOT[=ALTNAME]',
631      help='add ROOT directory, optionally map in coverage results as ALTNAME')
632  parser.add_option(
633      '-c', '--config', dest='configs', type='string', action='append',
634      metavar='FILE',
635      help='read settings from configuration FILE')
636  parser.add_option(
637      '-a', '--addfiles', dest='addfiles', type='string', action='append',
638      metavar='PATH',
639      help='add files from PATH to coverage data')
640  parser.add_option(
641      '-t', '--tree', dest='tree', action='store_true',
642      help='print tree of code coverage by group')
643  parser.add_option(
644      '-u', '--uninstrumented', dest='uninstrumented', action='store_true',
645      help='list uninstrumented files')
646  parser.add_option(
647      '-m', '--html', dest='html_out', type='string', metavar='PATH',
648      help='write HTML output to PATH')
649  parser.add_option(
650      '-b', '--base_url', dest='base_url', type='string', metavar='URL',
651      help='include URL in base tag of HTML output')
652
653  parser.set_defaults(
654      inputs=[],
655      roots=[],
656      configs=[],
657      addfiles=[],
658      tree=False,
659      html_out=None,
660  )
661
662  options = parser.parse_args(args=argv)[0]
663
664  cov = Coverage()
665
666  # Set root directories for coverage
667  for root_opt in options.roots:
668    if '=' in root_opt:
669      cov.AddRoot(*root_opt.split('='))
670    else:
671      cov.AddRoot(root_opt)
672
673  # Read config files
674  for config_file in options.configs:
675    cov.ParseConfig(config_file, lcov_queue=options.inputs,
676                    addfiles_queue=options.addfiles)
677
678  # Parse lcov files
679  for input_filename in options.inputs:
680    cov.ParseLcovFile(input_filename)
681
682  # Add missing files
683  for add_path in options.addfiles:
684    cov.AddFiles(add_path)
685
686  # Print help if no files specified
687  if not cov.files:
688    print 'No covered files found.'
689    parser.print_help()
690    return 1
691
692  # Update tree stats
693  cov.UpdateTreeStats()
694
695  # Print uninstrumented filenames
696  if options.uninstrumented:
697    print 'Uninstrumented files:'
698    for f in sorted(cov.files):
699      covf = cov.files[f]
700      if not covf.in_lcov:
701        print '  %-6s %-6s %s' % (covf.attrs.get('group'),
702                                  covf.attrs.get('language'), f)
703
704  # Print tree stats
705  if options.tree:
706    cov.PrintTree()
707
708  # Print stats
709  for ps_args in cov.print_stats:
710    cov.PrintStat(**ps_args)
711
712  # Generate HTML
713  if options.html_out:
714    html = croc_html.CrocHtml(cov, options.html_out, options.base_url)
715    html.Write()
716
717  # Normal exit
718  return 0
719
720
721if __name__ == '__main__':
722  sys.exit(Main(sys.argv))
723