17757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch"""Code parsing for Coverage."""
27757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
37757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochimport opcode, re, sys, token, tokenize
47757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
57757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.backward import set, sorted, StringIO # pylint: disable=W0622
67757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.backward import open_source
77757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.bytecode import ByteCodes, CodeObjects
87757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.misc import nice_pair, expensive, join_regex
97757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.misc import CoverageException, NoSource, NotPython
107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochclass CodeParser(object):
137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    """Parse code to find executable lines, excluded lines, etc."""
147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def __init__(self, text=None, filename=None, exclude=None):
167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Source can be provided as `text`, the text itself, or `filename`, from
187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        which the text will be read.  Excluded lines are those that match
197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        `exclude`, a regex.
207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        assert text or filename, "CodeParser needs either text or filename"
237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.filename = filename or "<code>"
247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.text = text
257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if not self.text:
267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            try:
277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                sourcef = open_source(self.filename)
287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                try:
297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    self.text = sourcef.read()
307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                finally:
317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    sourcef.close()
327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            except IOError:
337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                _, err, _ = sys.exc_info()
347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                raise NoSource(
357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    "No source for code: %r: %s" % (self.filename, err)
367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    )
377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.exclude = exclude
397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.show_tokens = False
417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # The text lines of the parsed code.
437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.lines = self.text.split('\n')
447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # The line numbers of excluded lines of code.
467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.excluded = set()
477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # The line numbers of docstring lines.
497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.docstrings = set()
507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # The line numbers of class definitions.
527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.classdefs = set()
537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # A dict mapping line numbers to (lo,hi) for multi-line statements.
557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.multiline = {}
567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # The line numbers that start statements.
587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.statement_starts = set()
597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Lazily-created ByteParser
617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self._byte_parser = None
627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _get_byte_parser(self):
647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Create a ByteParser on demand."""
657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if not self._byte_parser:
667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            self._byte_parser = \
677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            ByteParser(text=self.text, filename=self.filename)
687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return self._byte_parser
697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    byte_parser = property(_get_byte_parser)
707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def lines_matching(self, *regexes):
727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Find the lines matching one of a list of regexes.
737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Returns a set of line numbers, the lines that contain a match for one
757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        of the regexes in `regexes`.  The entire line needn't match, just a
767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        part of it.
777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        regex_c = re.compile(join_regex(regexes))
807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        matches = set()
817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for i, ltext in enumerate(self.lines):
827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if regex_c.search(ltext):
837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                matches.add(i+1)
847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return matches
857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _raw_parse(self):
877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Parse the source to find the interesting facts about its lines.
887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        A handful of member fields are updated.
907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Find lines which match an exclusion pattern.
937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if self.exclude:
947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            self.excluded = self.lines_matching(self.exclude)
957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Tokenize, to find excluded suites, to find docstrings, and to find
977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # multi-line statements.
987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        indent = 0
997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        exclude_indent = 0
1007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        excluding = False
1017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        prev_toktype = token.INDENT
1027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        first_line = None
1037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        empty = True
1047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
1067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
1077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if self.show_tokens:                # pragma: no cover
1087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                print("%10s %5s %-20r %r" % (
1097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    tokenize.tok_name.get(toktype, toktype),
1107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    nice_pair((slineno, elineno)), ttext, ltext
1117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    ))
1127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if toktype == token.INDENT:
1137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                indent += 1
1147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            elif toktype == token.DEDENT:
1157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                indent -= 1
1167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            elif toktype == token.NAME and ttext == 'class':
1177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Class definitions look like branches in the byte code, so
1187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # we need to exclude them.  The simplest way is to note the
1197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # lines with the 'class' keyword.
1207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                self.classdefs.add(slineno)
1217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            elif toktype == token.OP and ttext == ':':
1227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if not excluding and elineno in self.excluded:
1237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # Start excluding a suite.  We trigger off of the colon
1247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # token so that the #pragma comment will be recognized on
1257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # the same line as the colon.
1267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    exclude_indent = indent
1277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    excluding = True
1287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            elif toktype == token.STRING and prev_toktype == token.INDENT:
1297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Strings that are first on an indented line are docstrings.
1307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # (a trick from trace.py in the stdlib.) This works for
1317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # 99.9999% of cases.  For the rest (!) see:
1327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # http://stackoverflow.com/questions/1769332/x/1769794#1769794
1337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                for i in range(slineno, elineno+1):
1347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    self.docstrings.add(i)
1357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            elif toktype == token.NEWLINE:
1367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if first_line is not None and elineno != first_line:
1377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # We're at the end of a line, and we've ended on a
1387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # different line than the first line of the statement,
1397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # so record a multi-line range.
1407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    rng = (first_line, elineno)
1417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    for l in range(first_line, elineno+1):
1427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        self.multiline[l] = rng
1437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                first_line = None
1447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if ttext.strip() and toktype != tokenize.COMMENT:
1467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # A non-whitespace token.
1477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                empty = False
1487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if first_line is None:
1497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # The token is not whitespace, and is the first in a
1507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # statement.
1517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    first_line = slineno
1527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # Check whether to end an excluded suite.
1537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    if excluding and indent <= exclude_indent:
1547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        excluding = False
1557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    if excluding:
1567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        self.excluded.add(elineno)
1577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            prev_toktype = toktype
1597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Find the starts of the executable statements.
1617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if not empty:
1627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            self.statement_starts.update(self.byte_parser._find_statements())
1637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def first_line(self, line):
1657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Return the first line number of the statement including `line`."""
1667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        rng = self.multiline.get(line)
1677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if rng:
1687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            first_line = rng[0]
1697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        else:
1707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            first_line = line
1717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return first_line
1727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def first_lines(self, lines, ignore=None):
1747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Map the line numbers in `lines` to the correct first line of the
1757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        statement.
1767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Skip any line mentioned in `ignore`.
1787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Returns a sorted list of the first lines.
1807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
1827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        ignore = ignore or []
1837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        lset = set()
1847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for l in lines:
1857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if l in ignore:
1867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                continue
1877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            new_l = self.first_line(l)
1887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if new_l not in ignore:
1897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                lset.add(new_l)
1907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return sorted(lset)
1917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def parse_source(self):
1937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Parse source text to find executable lines, excluded lines, etc.
1947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Return values are 1) a sorted list of executable line numbers, and
1967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        2) a sorted list of excluded line numbers.
1977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
1987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Reported line numbers are normalized to the first line of multi-line
1997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        statements.
2007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
2027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self._raw_parse()
2037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        excluded_lines = self.first_lines(self.excluded)
2057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        ignore = excluded_lines + list(self.docstrings)
2067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        lines = self.first_lines(self.statement_starts, ignore)
2077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return lines, excluded_lines
2097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def arcs(self):
2117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Get information about the arcs available in the code.
2127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Returns a sorted list of line number pairs.  Line numbers have been
2147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        normalized to the first line of multiline statements.
2157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
2177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        all_arcs = []
2187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for l1, l2 in self.byte_parser._all_arcs():
2197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            fl1 = self.first_line(l1)
2207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            fl2 = self.first_line(l2)
2217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if fl1 != fl2:
2227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                all_arcs.append((fl1, fl2))
2237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return sorted(all_arcs)
2247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    arcs = expensive(arcs)
2257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def exit_counts(self):
2277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Get a mapping from line numbers to count of exits from that line.
2287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Excluded lines are excluded.
2307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
2327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        excluded_lines = self.first_lines(self.excluded)
2337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        exit_counts = {}
2347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for l1, l2 in self.arcs():
2357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if l1 < 0:
2367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Don't ever report -1 as a line number
2377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                continue
2387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if l1 in excluded_lines:
2397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Don't report excluded lines as line numbers.
2407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                continue
2417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if l2 in excluded_lines:
2427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Arcs to excluded lines shouldn't count.
2437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                continue
2447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if l1 not in exit_counts:
2457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                exit_counts[l1] = 0
2467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            exit_counts[l1] += 1
2477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Class definitions have one extra exit, so remove one for each:
2497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for l in self.classdefs:
2507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # Ensure key is there: classdefs can include excluded lines.
2517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if l in exit_counts:
2527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                exit_counts[l] -= 1
2537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return exit_counts
2557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    exit_counts = expensive(exit_counts)
2567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch## Opcodes that guide the ByteParser.
2597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochdef _opcode(name):
2617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    """Return the opcode by name from the opcode module."""
2627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    return opcode.opmap[name]
2637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochdef _opcode_set(*names):
2657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    """Return a set of opcodes by the names in `names`."""
2667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    s = set()
2677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    for name in names:
2687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        try:
2697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            s.add(_opcode(name))
2707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        except KeyError:
2717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            pass
2727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    return s
2737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that leave the code object.
2757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_CODE_END = _opcode_set('RETURN_VALUE')
2767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that unconditionally end the code chunk.
2787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_CHUNK_END = _opcode_set(
2797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
2807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    'BREAK_LOOP', 'CONTINUE_LOOP',
2817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    )
2827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that unconditionally begin a new code chunk.  By starting new chunks
2847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# with unconditional jump instructions, we neatly deal with jumps to jumps
2857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# properly.
2867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
2877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that push a block on the block stack.
2897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_PUSH_BLOCK = _opcode_set(
2907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
2917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    )
2927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Block types for exception handling.
2947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
2957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that pop a block from the block stack.
2977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_POP_BLOCK = _opcode_set('POP_BLOCK')
2987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
2997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that have a jump destination, but aren't really a jump.
3007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_NO_JUMP = OPS_PUSH_BLOCK
3017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Individual opcodes we need below.
3037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_BREAK_LOOP = _opcode('BREAK_LOOP')
3047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_END_FINALLY = _opcode('END_FINALLY')
3057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_COMPARE_OP = _opcode('COMPARE_OP')
3067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochCOMPARE_EXCEPTION = 10  # just have to get this const from the code.
3077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_LOAD_CONST = _opcode('LOAD_CONST')
3087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_RETURN_VALUE = _opcode('RETURN_VALUE')
3097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochclass ByteParser(object):
3127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    """Parse byte codes to understand the structure of code."""
3137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def __init__(self, code=None, text=None, filename=None):
3157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if code:
3167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            self.code = code
3177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            self.text = text
3187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        else:
3197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if not text:
3207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                assert filename, "If no code or text, need a filename"
3217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                sourcef = open_source(filename)
3227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                try:
3237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    text = sourcef.read()
3247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                finally:
3257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    sourcef.close()
3267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            self.text = text
3277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            try:
3297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Python 2.3 and 2.4 don't like partial last lines, so be sure
3307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # the text ends nicely for them.
3317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                self.code = compile(text + '\n', filename, "exec")
3327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            except SyntaxError:
3337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                _, synerr, _ = sys.exc_info()
3347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                raise NotPython(
3357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    "Couldn't parse '%s' as Python source: '%s' at line %d" %
3367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        (filename, synerr.msg, synerr.lineno)
3377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    )
3387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Alternative Python implementations don't always provide all the
3407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # attributes on code objects that we need to do the analysis.
3417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
3427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if not hasattr(self.code, attr):
3437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                raise CoverageException(
3447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    "This implementation of Python doesn't support code "
3457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    "analysis.\n"
3467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    "Run coverage.py under CPython for this command."
3477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    )
3487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def child_parsers(self):
3507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Iterate over all the code objects nested within this one.
3517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        The iteration includes `self` as its first value.
3537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
3557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        children = CodeObjects(self.code)
3567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return [ByteParser(code=c, text=self.text) for c in children]
3577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    # Getting numbers from the lnotab value changed in Py3.0.
3597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    if sys.version_info >= (3, 0):
3607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        def _lnotab_increments(self, lnotab):
3617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            """Return a list of ints from the lnotab bytes in 3.x"""
3627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            return list(lnotab)
3637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    else:
3647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        def _lnotab_increments(self, lnotab):
3657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            """Return a list of ints from the lnotab string in 2.x"""
3667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            return [ord(c) for c in lnotab]
3677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _bytes_lines(self):
3697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Map byte offsets to line numbers in `code`.
3707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Uses co_lnotab described in Python/compile.c to map byte offsets to
3727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
3737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
3757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Adapted from dis.py in the standard library.
3767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
3777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
3787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        bytes_lines = []
3807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        last_line_num = None
3817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        line_num = self.code.co_firstlineno
3827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        byte_num = 0
3837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for byte_incr, line_incr in zip(byte_increments, line_increments):
3847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if byte_incr:
3857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if line_num != last_line_num:
3867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    bytes_lines.append((byte_num, line_num))
3877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    last_line_num = line_num
3887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                byte_num += byte_incr
3897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            line_num += line_incr
3907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if line_num != last_line_num:
3917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            bytes_lines.append((byte_num, line_num))
3927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return bytes_lines
3937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _find_statements(self):
3957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Find the statements in `self.code`.
3967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
3977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Return a set of line numbers that start statements.  Recurses into all
3987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        code objects reachable from `self.code`.
3997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
4017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        stmts = set()
4027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for bp in self.child_parsers():
4037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # Get all of the lineno information from this code.
4047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            for _, l in bp._bytes_lines():
4057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                stmts.add(l)
4067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return stmts
4077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _split_into_chunks(self):
4097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Split the code object into a list of `Chunk` objects.
4107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Each chunk is only entered at its first instruction, though there can
4127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        be many exits from a chunk.
4137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Returns a list of `Chunk` objects.
4157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
4177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # The list of chunks so far, and the one we're working on.
4197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        chunks = []
4207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        chunk = None
4217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        bytes_lines_map = dict(self._bytes_lines())
4227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # The block stack: loops and try blocks get pushed here for the
4247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # implicit jumps that can occur.
4257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Each entry is a tuple: (block type, destination)
4267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        block_stack = []
4277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Some op codes are followed by branches that should be ignored.  This
4297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # is a count of how many ignores are left.
4307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        ignore_branch = 0
4317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # We have to handle the last two bytecodes specially.
4337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        ult = penult = None
4347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for bc in ByteCodes(self.code.co_code):
4367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # Maybe have to start a new chunk
4377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.offset in bytes_lines_map:
4387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Start a new chunk for each source line number.
4397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if chunk:
4407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    chunk.exits.add(bc.offset)
4417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
4427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunks.append(chunk)
4437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            elif bc.op in OPS_CHUNK_BEGIN:
4447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Jumps deserve their own unnumbered chunk.  This fixes
4457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # problems with jumps to jumps getting confused.
4467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if chunk:
4477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    chunk.exits.add(bc.offset)
4487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunk = Chunk(bc.offset)
4497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunks.append(chunk)
4507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if not chunk:
4527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunk = Chunk(bc.offset)
4537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunks.append(chunk)
4547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # Look at the opcode
4567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
4577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if ignore_branch:
4587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # Someone earlier wanted us to ignore this branch.
4597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    ignore_branch -= 1
4607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                else:
4617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # The opcode has a jump, it's an exit for this chunk.
4627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    chunk.exits.add(bc.jump_to)
4637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.op in OPS_CODE_END:
4657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # The opcode can exit the code object.
4667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunk.exits.add(-self.code.co_firstlineno)
4677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.op in OPS_PUSH_BLOCK:
4687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # The opcode adds a block to the block_stack.
4697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                block_stack.append((bc.op, bc.jump_to))
4707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.op in OPS_POP_BLOCK:
4717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # The opcode pops a block from the block stack.
4727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                block_stack.pop()
4737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.op in OPS_CHUNK_END:
4747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # This opcode forces the end of the chunk.
4757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if bc.op == OP_BREAK_LOOP:
4767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # A break is implicit: jump where the top of the
4777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # block_stack points.
4787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    chunk.exits.add(block_stack[-1][1])
4797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunk = None
4807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.op == OP_END_FINALLY:
4817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if block_stack:
4827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # A break that goes through a finally will jump to whatever
4837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    # block is on top of the stack.
4847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    chunk.exits.add(block_stack[-1][1])
4857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # For the finally clause we need to find the closest exception
4867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # block, and use its jump target as an exit.
4877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                for iblock in range(len(block_stack)-1, -1, -1):
4887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:
4897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        chunk.exits.add(block_stack[iblock][1])
4907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        break
4917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
4927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # This is an except clause.  We want to overlook the next
4937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # branch, so that except's don't count as branches.
4947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                ignore_branch += 1
4957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            penult = ult
4977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            ult = bc
4987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
4997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        if chunks:
5007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # The last two bytecodes could be a dummy "return None" that
5017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # shouldn't be counted as real code. Every Python code object seems
5027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # to end with a return, and a "return None" is inserted if there
5037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # isn't an explicit return in the source.
5047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if ult and penult:
5057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
5067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    if self.code.co_consts[penult.arg] is None:
5077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        # This is "return None", but is it dummy?  A real line
5087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        # would be a last chunk all by itself.
5097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        if chunks[-1].byte != penult.offset:
5107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            ex = -self.code.co_firstlineno
5117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            # Split the last chunk
5127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            last_chunk = chunks[-1]
5137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            last_chunk.exits.remove(ex)
5147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            last_chunk.exits.add(penult.offset)
5157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            chunk = Chunk(penult.offset)
5167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            chunk.exits.add(ex)
5177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            chunks.append(chunk)
5187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # Give all the chunks a length.
5207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            chunks[-1].length = bc.next_offset - chunks[-1].byte
5217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            for i in range(len(chunks)-1):
5227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                chunks[i].length = chunks[i+1].byte - chunks[i].byte
5237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return chunks
5257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _arcs(self):
5277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Find the executable arcs in the code.
5287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        Returns a set of pairs, (from,to).  From and to are integer line
5307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        numbers.  If from is < 0, then the arc is an entrance into the code
5317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        object.  If to is < 0, the arc is an exit from the code object.
5327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
5347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        chunks = self._split_into_chunks()
5357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # A map from byte offsets to chunks jumped into.
5377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        byte_chunks = dict([(c.byte, c) for c in chunks])
5387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Build a map from byte offsets to actual lines reached.
5407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        byte_lines = {}
5417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        bytes_to_add = set([c.byte for c in chunks])
5427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        while bytes_to_add:
5447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            byte_to_add = bytes_to_add.pop()
5457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if byte_to_add in byte_lines or byte_to_add < 0:
5467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                continue
5477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            # Which lines does this chunk lead to?
5497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            bytes_considered = set()
5507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            bytes_to_consider = [byte_to_add]
5517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            lines = set()
5527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            while bytes_to_consider:
5547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                byte = bytes_to_consider.pop()
5557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                bytes_considered.add(byte)
5567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                # Find chunk for byte
5587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                try:
5597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    ch = byte_chunks[byte]
5607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                except KeyError:
5617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    for ch in chunks:
5627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        if ch.byte <= byte < ch.byte+ch.length:
5637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            break
5647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    else:
5657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        # No chunk for this byte!
5667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        raise Exception("Couldn't find chunk @ %d" % byte)
5677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    byte_chunks[byte] = ch
5687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                if ch.line:
5707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    lines.add(ch.line)
5717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                else:
5727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    for ex in ch.exits:
5737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        if ex < 0:
5747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            lines.add(ex)
5757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        elif ex not in bytes_considered:
5767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            bytes_to_consider.append(ex)
5777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                bytes_to_add.update(ch.exits)
5797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            byte_lines[byte_to_add] = lines
5817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        # Figure out for each chunk where the exits go.
5837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        arcs = set()
5847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for chunk in chunks:
5857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            if chunk.line:
5867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                for ex in chunk.exits:
5877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    if ex < 0:
5887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        exit_lines = [ex]
5897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    else:
5907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        exit_lines = byte_lines[ex]
5917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                    for exit_line in exit_lines:
5927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                        if chunk.line != exit_line:
5937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch                            arcs.add((chunk.line, exit_line))
5947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for line in byte_lines[0]:
5957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            arcs.add((-1, line))
5967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return arcs
5987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
5997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _all_chunks(self):
6007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Returns a list of `Chunk` objects for this code and its children.
6017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        See `_split_into_chunks` for details.
6037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
6057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        chunks = []
6067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for bp in self.child_parsers():
6077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            chunks.extend(bp._split_into_chunks())
6087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return chunks
6107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def _all_arcs(self):
6127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """Get the set of all arcs in this code object and its children.
6137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        See `_arcs` for details.
6157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        """
6177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        arcs = set()
6187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        for bp in self.child_parsers():
6197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            arcs.update(bp._arcs())
6207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return arcs
6227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochclass Chunk(object):
6257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    """A sequence of bytecodes with a single entrance.
6267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    To analyze byte code, we have to divide it into chunks, sequences of byte
6287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    codes such that each basic block has only one entrance, the first
6297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    instruction in the block.
6307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    This is almost the CS concept of `basic block`_, except that we're willing
6327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    to have many exits from a chunk, and "basic block" is a more cumbersome
6337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    term.
6347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    .. _basic block: http://en.wikipedia.org/wiki/Basic_block
6367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    An exit < 0 means the chunk can leave the code (return).  The exit is
6387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    the negative of the starting line number of the code block.
6397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    """
6417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def __init__(self, byte, line=0):
6427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.byte = byte
6437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.line = line
6447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.length = 0
6457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        self.exits = set()
6467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch
6477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch    def __repr__(self):
6487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch        return "<%d+%d @%d %r>" % (
6497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            self.byte, self.length, self.line, list(self.exits)
6507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch            )
651