1"""Core control stuff for Coverage."""
2
3import atexit, os, random, socket, sys
4
5from coverage.annotate import AnnotateReporter
6from coverage.backward import string_class
7from coverage.codeunit import code_unit_factory, CodeUnit
8from coverage.collector import Collector
9from coverage.config import CoverageConfig
10from coverage.data import CoverageData
11from coverage.files import FileLocator, TreeMatcher, FnmatchMatcher
12from coverage.files import PathAliases, find_python_files
13from coverage.html import HtmlReporter
14from coverage.misc import CoverageException, bool_or_none, join_regex
15from coverage.results import Analysis, Numbers
16from coverage.summary import SummaryReporter
17from coverage.xmlreport import XmlReporter
18
19class coverage(object):
20    """Programmatic access to Coverage.
21
22    To use::
23
24        from coverage import coverage
25
26        cov = coverage()
27        cov.start()
28        #.. blah blah (run your code) blah blah ..
29        cov.stop()
30        cov.html_report(directory='covhtml')
31
32    """
33    def __init__(self, data_file=None, data_suffix=None, cover_pylib=None,
34                auto_data=False, timid=None, branch=None, config_file=True,
35                source=None, omit=None, include=None):
36        """
37        `data_file` is the base name of the data file to use, defaulting to
38        ".coverage".  `data_suffix` is appended (with a dot) to `data_file` to
39        create the final file name.  If `data_suffix` is simply True, then a
40        suffix is created with the machine and process identity included.
41
42        `cover_pylib` is a boolean determining whether Python code installed
43        with the Python interpreter is measured.  This includes the Python
44        standard library and any packages installed with the interpreter.
45
46        If `auto_data` is true, then any existing data file will be read when
47        coverage measurement starts, and data will be saved automatically when
48        measurement stops.
49
50        If `timid` is true, then a slower and simpler trace function will be
51        used.  This is important for some environments where manipulation of
52        tracing functions breaks the faster trace function.
53
54        If `branch` is true, then branch coverage will be measured in addition
55        to the usual statement coverage.
56
57        `config_file` determines what config file to read.  If it is a string,
58        it is the name of the config file to read.  If it is True, then a
59        standard file is read (".coveragerc").  If it is False, then no file is
60        read.
61
62        `source` is a list of file paths or package names.  Only code located
63        in the trees indicated by the file paths or package names will be
64        measured.
65
66        `include` and `omit` are lists of filename patterns. Files that match
67        `include` will be measured, files that match `omit` will not.  Each
68        will also accept a single string argument.
69
70        """
71        from coverage import __version__
72
73        # A record of all the warnings that have been issued.
74        self._warnings = []
75
76        # Build our configuration from a number of sources:
77        # 1: defaults:
78        self.config = CoverageConfig()
79
80        # 2: from the coveragerc file:
81        if config_file:
82            if config_file is True:
83                config_file = ".coveragerc"
84            try:
85                self.config.from_file(config_file)
86            except ValueError:
87                _, err, _ = sys.exc_info()
88                raise CoverageException(
89                    "Couldn't read config file %s: %s" % (config_file, err)
90                    )
91
92        # 3: from environment variables:
93        self.config.from_environment('COVERAGE_OPTIONS')
94        env_data_file = os.environ.get('COVERAGE_FILE')
95        if env_data_file:
96            self.config.data_file = env_data_file
97
98        # 4: from constructor arguments:
99        if isinstance(omit, string_class):
100            omit = [omit]
101        if isinstance(include, string_class):
102            include = [include]
103        self.config.from_args(
104            data_file=data_file, cover_pylib=cover_pylib, timid=timid,
105            branch=branch, parallel=bool_or_none(data_suffix),
106            source=source, omit=omit, include=include
107            )
108
109        self.auto_data = auto_data
110        self.atexit_registered = False
111
112        # _exclude_re is a dict mapping exclusion list names to compiled
113        # regexes.
114        self._exclude_re = {}
115        self._exclude_regex_stale()
116
117        self.file_locator = FileLocator()
118
119        # The source argument can be directories or package names.
120        self.source = []
121        self.source_pkgs = []
122        for src in self.config.source or []:
123            if os.path.exists(src):
124                self.source.append(self.file_locator.canonical_filename(src))
125            else:
126                self.source_pkgs.append(src)
127
128        self.omit = self._prep_patterns(self.config.omit)
129        self.include = self._prep_patterns(self.config.include)
130
131        self.collector = Collector(
132            self._should_trace, timid=self.config.timid,
133            branch=self.config.branch, warn=self._warn
134            )
135
136        # Suffixes are a bit tricky.  We want to use the data suffix only when
137        # collecting data, not when combining data.  So we save it as
138        # `self.run_suffix` now, and promote it to `self.data_suffix` if we
139        # find that we are collecting data later.
140        if data_suffix or self.config.parallel:
141            if not isinstance(data_suffix, string_class):
142                # if data_suffix=True, use .machinename.pid.random
143                data_suffix = True
144        else:
145            data_suffix = None
146        self.data_suffix = None
147        self.run_suffix = data_suffix
148
149        # Create the data file.  We do this at construction time so that the
150        # data file will be written into the directory where the process
151        # started rather than wherever the process eventually chdir'd to.
152        self.data = CoverageData(
153            basename=self.config.data_file,
154            collector="coverage v%s" % __version__
155            )
156
157        # The dirs for files considered "installed with the interpreter".
158        self.pylib_dirs = []
159        if not self.config.cover_pylib:
160            # Look at where some standard modules are located. That's the
161            # indication for "installed with the interpreter". In some
162            # environments (virtualenv, for example), these modules may be
163            # spread across a few locations. Look at all the candidate modules
164            # we've imported, and take all the different ones.
165            for m in (atexit, os, random, socket):
166                if hasattr(m, "__file__"):
167                    m_dir = self._canonical_dir(m.__file__)
168                    if m_dir not in self.pylib_dirs:
169                        self.pylib_dirs.append(m_dir)
170
171        # To avoid tracing the coverage code itself, we skip anything located
172        # where we are.
173        self.cover_dir = self._canonical_dir(__file__)
174
175        # The matchers for _should_trace, created when tracing starts.
176        self.source_match = None
177        self.pylib_match = self.cover_match = None
178        self.include_match = self.omit_match = None
179
180        # Only _harvest_data once per measurement cycle.
181        self._harvested = False
182
183        # Set the reporting precision.
184        Numbers.set_precision(self.config.precision)
185
186        # When tearing down the coverage object, modules can become None.
187        # Saving the modules as object attributes avoids problems, but it is
188        # quite ad-hoc which modules need to be saved and which references
189        # need to use the object attributes.
190        self.socket = socket
191        self.os = os
192        self.random = random
193
194    def _canonical_dir(self, f):
195        """Return the canonical directory of the file `f`."""
196        return os.path.split(self.file_locator.canonical_filename(f))[0]
197
198    def _source_for_file(self, filename):
199        """Return the source file for `filename`."""
200        if not filename.endswith(".py"):
201            if filename[-4:-1] == ".py":
202                filename = filename[:-1]
203        return filename
204
205    def _should_trace(self, filename, frame):
206        """Decide whether to trace execution in `filename`
207
208        This function is called from the trace function.  As each new file name
209        is encountered, this function determines whether it is traced or not.
210
211        Returns a canonicalized filename if it should be traced, False if it
212        should not.
213
214        """
215        if os is None:
216            return False
217
218        if filename.startswith('<'):
219            # Lots of non-file execution is represented with artificial
220            # filenames like "<string>", "<doctest readme.txt[0]>", or
221            # "<exec_function>".  Don't ever trace these executions, since we
222            # can't do anything with the data later anyway.
223            return False
224
225        if filename.endswith(".html"):
226            # Jinja and maybe other templating systems compile templates into
227            # Python code, but use the template filename as the filename in
228            # the compiled code.  Of course, those filenames are useless later
229            # so don't bother collecting.  TODO: How should we really separate
230            # out good file extensions from bad?
231            return False
232
233        self._check_for_packages()
234
235        # Compiled Python files have two filenames: frame.f_code.co_filename is
236        # the filename at the time the .pyc was compiled.  The second name is
237        # __file__, which is where the .pyc was actually loaded from.  Since
238        # .pyc files can be moved after compilation (for example, by being
239        # installed), we look for __file__ in the frame and prefer it to the
240        # co_filename value.
241        dunder_file = frame.f_globals.get('__file__')
242        if dunder_file:
243            filename = self._source_for_file(dunder_file)
244
245        # Jython reports the .class file to the tracer, use the source file.
246        if filename.endswith("$py.class"):
247            filename = filename[:-9] + ".py"
248
249        canonical = self.file_locator.canonical_filename(filename)
250
251        # If the user specified source or include, then that's authoritative
252        # about the outer bound of what to measure and we don't have to apply
253        # any canned exclusions. If they didn't, then we have to exclude the
254        # stdlib and coverage.py directories.
255        if self.source_match:
256            if not self.source_match.match(canonical):
257                return False
258        elif self.include_match:
259            if not self.include_match.match(canonical):
260                return False
261        else:
262            # If we aren't supposed to trace installed code, then check if this
263            # is near the Python standard library and skip it if so.
264            if self.pylib_match and self.pylib_match.match(canonical):
265                return False
266
267            # We exclude the coverage code itself, since a little of it will be
268            # measured otherwise.
269            if self.cover_match and self.cover_match.match(canonical):
270                return False
271
272        # Check the file against the omit pattern.
273        if self.omit_match and self.omit_match.match(canonical):
274            return False
275
276        return canonical
277
278    # To log what should_trace returns, change this to "if 1:"
279    if 0:
280        _real_should_trace = _should_trace
281        def _should_trace(self, filename, frame):   # pylint: disable=E0102
282            """A logging decorator around the real _should_trace function."""
283            ret = self._real_should_trace(filename, frame)
284            print("should_trace: %r -> %r" % (filename, ret))
285            return ret
286
287    def _warn(self, msg):
288        """Use `msg` as a warning."""
289        self._warnings.append(msg)
290        sys.stderr.write("Coverage.py warning: %s\n" % msg)
291
292    def _prep_patterns(self, patterns):
293        """Prepare the file patterns for use in a `FnmatchMatcher`.
294
295        If a pattern starts with a wildcard, it is used as a pattern
296        as-is.  If it does not start with a wildcard, then it is made
297        absolute with the current directory.
298
299        If `patterns` is None, an empty list is returned.
300
301        """
302        patterns = patterns or []
303        prepped = []
304        for p in patterns or []:
305            if p.startswith("*") or p.startswith("?"):
306                prepped.append(p)
307            else:
308                prepped.append(self.file_locator.abs_file(p))
309        return prepped
310
311    def _check_for_packages(self):
312        """Update the source_match matcher with latest imported packages."""
313        # Our self.source_pkgs attribute is a list of package names we want to
314        # measure.  Each time through here, we see if we've imported any of
315        # them yet.  If so, we add its file to source_match, and we don't have
316        # to look for that package any more.
317        if self.source_pkgs:
318            found = []
319            for pkg in self.source_pkgs:
320                try:
321                    mod = sys.modules[pkg]
322                except KeyError:
323                    continue
324
325                found.append(pkg)
326
327                try:
328                    pkg_file = mod.__file__
329                except AttributeError:
330                    self._warn("Module %s has no Python source." % pkg)
331                else:
332                    d, f = os.path.split(pkg_file)
333                    if f.startswith('__init__.'):
334                        # This is actually a package, return the directory.
335                        pkg_file = d
336                    else:
337                        pkg_file = self._source_for_file(pkg_file)
338                    pkg_file = self.file_locator.canonical_filename(pkg_file)
339                    self.source.append(pkg_file)
340                    self.source_match.add(pkg_file)
341
342            for pkg in found:
343                self.source_pkgs.remove(pkg)
344
345    def use_cache(self, usecache):
346        """Control the use of a data file (incorrectly called a cache).
347
348        `usecache` is true or false, whether to read and write data on disk.
349
350        """
351        self.data.usefile(usecache)
352
353    def load(self):
354        """Load previously-collected coverage data from the data file."""
355        self.collector.reset()
356        self.data.read()
357
358    def start(self):
359        """Start measuring code coverage."""
360        if self.run_suffix:
361            # Calling start() means we're running code, so use the run_suffix
362            # as the data_suffix when we eventually save the data.
363            self.data_suffix = self.run_suffix
364        if self.auto_data:
365            self.load()
366            # Save coverage data when Python exits.
367            if not self.atexit_registered:
368                atexit.register(self.save)
369                self.atexit_registered = True
370
371        # Create the matchers we need for _should_trace
372        if self.source or self.source_pkgs:
373            self.source_match = TreeMatcher(self.source)
374        else:
375            if self.cover_dir:
376                self.cover_match = TreeMatcher([self.cover_dir])
377            if self.pylib_dirs:
378                self.pylib_match = TreeMatcher(self.pylib_dirs)
379        if self.include:
380            self.include_match = FnmatchMatcher(self.include)
381        if self.omit:
382            self.omit_match = FnmatchMatcher(self.omit)
383
384        self._harvested = False
385        self.collector.start()
386
387    def stop(self):
388        """Stop measuring code coverage."""
389        self.collector.stop()
390        self._harvest_data()
391
392    def erase(self):
393        """Erase previously-collected coverage data.
394
395        This removes the in-memory data collected in this session as well as
396        discarding the data file.
397
398        """
399        self.collector.reset()
400        self.data.erase()
401
402    def clear_exclude(self, which='exclude'):
403        """Clear the exclude list."""
404        setattr(self.config, which + "_list", [])
405        self._exclude_regex_stale()
406
407    def exclude(self, regex, which='exclude'):
408        """Exclude source lines from execution consideration.
409
410        A number of lists of regular expressions are maintained.  Each list
411        selects lines that are treated differently during reporting.
412
413        `which` determines which list is modified.  The "exclude" list selects
414        lines that are not considered executable at all.  The "partial" list
415        indicates lines with branches that are not taken.
416
417        `regex` is a regular expression.  The regex is added to the specified
418        list.  If any of the regexes in the list is found in a line, the line
419        is marked for special treatment during reporting.
420
421        """
422        excl_list = getattr(self.config, which + "_list")
423        excl_list.append(regex)
424        self._exclude_regex_stale()
425
426    def _exclude_regex_stale(self):
427        """Drop all the compiled exclusion regexes, a list was modified."""
428        self._exclude_re.clear()
429
430    def _exclude_regex(self, which):
431        """Return a compiled regex for the given exclusion list."""
432        if which not in self._exclude_re:
433            excl_list = getattr(self.config, which + "_list")
434            self._exclude_re[which] = join_regex(excl_list)
435        return self._exclude_re[which]
436
437    def get_exclude_list(self, which='exclude'):
438        """Return a list of excluded regex patterns.
439
440        `which` indicates which list is desired.  See `exclude` for the lists
441        that are available, and their meaning.
442
443        """
444        return getattr(self.config, which + "_list")
445
446    def save(self):
447        """Save the collected coverage data to the data file."""
448        data_suffix = self.data_suffix
449        if data_suffix is True:
450            # If data_suffix was a simple true value, then make a suffix with
451            # plenty of distinguishing information.  We do this here in
452            # `save()` at the last minute so that the pid will be correct even
453            # if the process forks.
454            data_suffix = "%s.%s.%06d" % (
455                self.socket.gethostname(), self.os.getpid(),
456                self.random.randint(0, 99999)
457                )
458
459        self._harvest_data()
460        self.data.write(suffix=data_suffix)
461
462    def combine(self):
463        """Combine together a number of similarly-named coverage data files.
464
465        All coverage data files whose name starts with `data_file` (from the
466        coverage() constructor) will be read, and combined together into the
467        current measurements.
468
469        """
470        aliases = None
471        if self.config.paths:
472            aliases = PathAliases(self.file_locator)
473            for paths in self.config.paths.values():
474                result = paths[0]
475                for pattern in paths[1:]:
476                    aliases.add(pattern, result)
477        self.data.combine_parallel_data(aliases=aliases)
478
479    def _harvest_data(self):
480        """Get the collected data and reset the collector.
481
482        Also warn about various problems collecting data.
483
484        """
485        if not self._harvested:
486            self.data.add_line_data(self.collector.get_line_data())
487            self.data.add_arc_data(self.collector.get_arc_data())
488            self.collector.reset()
489
490            # If there are still entries in the source_pkgs list, then we never
491            # encountered those packages.
492            for pkg in self.source_pkgs:
493                self._warn("Module %s was never imported." % pkg)
494
495            # Find out if we got any data.
496            summary = self.data.summary()
497            if not summary:
498                self._warn("No data was collected.")
499
500            # Find files that were never executed at all.
501            for src in self.source:
502                for py_file in find_python_files(src):
503                    self.data.touch_file(py_file)
504
505            self._harvested = True
506
507    # Backward compatibility with version 1.
508    def analysis(self, morf):
509        """Like `analysis2` but doesn't return excluded line numbers."""
510        f, s, _, m, mf = self.analysis2(morf)
511        return f, s, m, mf
512
513    def analysis2(self, morf):
514        """Analyze a module.
515
516        `morf` is a module or a filename.  It will be analyzed to determine
517        its coverage statistics.  The return value is a 5-tuple:
518
519        * The filename for the module.
520        * A list of line numbers of executable statements.
521        * A list of line numbers of excluded statements.
522        * A list of line numbers of statements not run (missing from
523          execution).
524        * A readable formatted string of the missing line numbers.
525
526        The analysis uses the source file itself and the current measured
527        coverage data.
528
529        """
530        analysis = self._analyze(morf)
531        return (
532            analysis.filename, analysis.statements, analysis.excluded,
533            analysis.missing, analysis.missing_formatted()
534            )
535
536    def _analyze(self, it):
537        """Analyze a single morf or code unit.
538
539        Returns an `Analysis` object.
540
541        """
542        if not isinstance(it, CodeUnit):
543            it = code_unit_factory(it, self.file_locator)[0]
544
545        return Analysis(self, it)
546
547    def report(self, morfs=None, show_missing=True, ignore_errors=None,
548                file=None,                          # pylint: disable=W0622
549                omit=None, include=None
550                ):
551        """Write a summary report to `file`.
552
553        Each module in `morfs` is listed, with counts of statements, executed
554        statements, missing statements, and a list of lines missed.
555
556        `include` is a list of filename patterns.  Modules whose filenames
557        match those patterns will be included in the report. Modules matching
558        `omit` will not be included in the report.
559
560        """
561        self.config.from_args(
562            ignore_errors=ignore_errors, omit=omit, include=include
563            )
564        reporter = SummaryReporter(
565            self, show_missing, self.config.ignore_errors
566            )
567        reporter.report(morfs, outfile=file, config=self.config)
568
569    def annotate(self, morfs=None, directory=None, ignore_errors=None,
570                    omit=None, include=None):
571        """Annotate a list of modules.
572
573        Each module in `morfs` is annotated.  The source is written to a new
574        file, named with a ",cover" suffix, with each line prefixed with a
575        marker to indicate the coverage of the line.  Covered lines have ">",
576        excluded lines have "-", and missing lines have "!".
577
578        See `coverage.report()` for other arguments.
579
580        """
581        self.config.from_args(
582            ignore_errors=ignore_errors, omit=omit, include=include
583            )
584        reporter = AnnotateReporter(self, self.config.ignore_errors)
585        reporter.report(morfs, config=self.config, directory=directory)
586
587    def html_report(self, morfs=None, directory=None, ignore_errors=None,
588                    omit=None, include=None):
589        """Generate an HTML report.
590
591        See `coverage.report()` for other arguments.
592
593        """
594        self.config.from_args(
595            ignore_errors=ignore_errors, omit=omit, include=include,
596            html_dir=directory,
597            )
598        reporter = HtmlReporter(self, self.config.ignore_errors)
599        reporter.report(morfs, config=self.config)
600
601    def xml_report(self, morfs=None, outfile=None, ignore_errors=None,
602                    omit=None, include=None):
603        """Generate an XML report of coverage results.
604
605        The report is compatible with Cobertura reports.
606
607        Each module in `morfs` is included in the report.  `outfile` is the
608        path to write the file to, "-" will write to stdout.
609
610        See `coverage.report()` for other arguments.
611
612        """
613        self.config.from_args(
614            ignore_errors=ignore_errors, omit=omit, include=include,
615            xml_output=outfile,
616            )
617        file_to_close = None
618        if self.config.xml_output:
619            if self.config.xml_output == '-':
620                outfile = sys.stdout
621            else:
622                outfile = open(self.config.xml_output, "w")
623                file_to_close = outfile
624        try:
625            reporter = XmlReporter(self, self.config.ignore_errors)
626            reporter.report(morfs, outfile=outfile, config=self.config)
627        finally:
628            if file_to_close:
629                file_to_close.close()
630
631    def sysinfo(self):
632        """Return a list of (key, value) pairs showing internal information."""
633
634        import coverage as covmod
635        import platform, re
636
637        try:
638            implementation = platform.python_implementation()
639        except AttributeError:
640            implementation = "unknown"
641
642        info = [
643            ('version', covmod.__version__),
644            ('coverage', covmod.__file__),
645            ('cover_dir', self.cover_dir),
646            ('pylib_dirs', self.pylib_dirs),
647            ('tracer', self.collector.tracer_name()),
648            ('data_path', self.data.filename),
649            ('python', sys.version.replace('\n', '')),
650            ('platform', platform.platform()),
651            ('implementation', implementation),
652            ('cwd', os.getcwd()),
653            ('path', sys.path),
654            ('environment', [
655                ("%s = %s" % (k, v)) for k, v in os.environ.items()
656                    if re.search("^COV|^PY", k)
657                ]),
658            ]
659        return info
660
661
662def process_startup():
663    """Call this at Python startup to perhaps measure coverage.
664
665    If the environment variable COVERAGE_PROCESS_START is defined, coverage
666    measurement is started.  The value of the variable is the config file
667    to use.
668
669    There are two ways to configure your Python installation to invoke this
670    function when Python starts:
671
672    #. Create or append to sitecustomize.py to add these lines::
673
674        import coverage
675        coverage.process_startup()
676
677    #. Create a .pth file in your Python installation containing::
678
679        import coverage; coverage.process_startup()
680
681    """
682    cps = os.environ.get("COVERAGE_PROCESS_START")
683    if cps:
684        cov = coverage(config_file=cps, auto_data=True)
685        if os.environ.get("COVERAGE_COVERAGE"):
686            # Measuring coverage within coverage.py takes yet more trickery.
687            cov.cover_dir = "Please measure coverage.py!"
688        cov.start()
689