17757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch"""Code parsing for Coverage.""" 27757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 37757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochimport opcode, re, sys, token, tokenize 47757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 57757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.backward import set, sorted, StringIO # pylint: disable=W0622 67757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.backward import open_source 77757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.bytecode import ByteCodes, CodeObjects 87757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.misc import nice_pair, expensive, join_regex 97757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochfrom coverage.misc import CoverageException, NoSource, NotPython 107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochclass CodeParser(object): 137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Parse code to find executable lines, excluded lines, etc.""" 147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def __init__(self, text=None, filename=None, exclude=None): 167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Source can be provided as `text`, the text itself, or `filename`, from 187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch which the text will be read. Excluded lines are those that match 197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch `exclude`, a regex. 207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch assert text or filename, "CodeParser needs either text or filename" 237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.filename = filename or "<code>" 247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.text = text 257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if not self.text: 267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch try: 277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch sourcef = open_source(self.filename) 287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch try: 297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.text = sourcef.read() 307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch finally: 317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch sourcef.close() 327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch except IOError: 337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch _, err, _ = sys.exc_info() 347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch raise NoSource( 357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch "No source for code: %r: %s" % (self.filename, err) 367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ) 377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.exclude = exclude 397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.show_tokens = False 417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The text lines of the parsed code. 437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.lines = self.text.split('\n') 447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The line numbers of excluded lines of code. 467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.excluded = set() 477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The line numbers of docstring lines. 497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.docstrings = set() 507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The line numbers of class definitions. 527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.classdefs = set() 537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # A dict mapping line numbers to (lo,hi) for multi-line statements. 557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.multiline = {} 567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The line numbers that start statements. 587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.statement_starts = set() 597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Lazily-created ByteParser 617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self._byte_parser = None 627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _get_byte_parser(self): 647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Create a ByteParser on demand.""" 657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if not self._byte_parser: 667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self._byte_parser = \ 677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ByteParser(text=self.text, filename=self.filename) 687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return self._byte_parser 697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_parser = property(_get_byte_parser) 707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def lines_matching(self, *regexes): 727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Find the lines matching one of a list of regexes. 737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Returns a set of line numbers, the lines that contain a match for one 757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch of the regexes in `regexes`. The entire line needn't match, just a 767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch part of it. 777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch regex_c = re.compile(join_regex(regexes)) 807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch matches = set() 817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for i, ltext in enumerate(self.lines): 827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if regex_c.search(ltext): 837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch matches.add(i+1) 847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return matches 857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _raw_parse(self): 877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Parse the source to find the interesting facts about its lines. 887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch A handful of member fields are updated. 907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Find lines which match an exclusion pattern. 937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if self.exclude: 947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.excluded = self.lines_matching(self.exclude) 957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Tokenize, to find excluded suites, to find docstrings, and to find 977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # multi-line statements. 987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch indent = 0 997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exclude_indent = 0 1007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch excluding = False 1017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch prev_toktype = token.INDENT 1027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch first_line = None 1037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch empty = True 1047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch tokgen = tokenize.generate_tokens(StringIO(self.text).readline) 1067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: 1077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if self.show_tokens: # pragma: no cover 1087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch print("%10s %5s %-20r %r" % ( 1097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch tokenize.tok_name.get(toktype, toktype), 1107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch nice_pair((slineno, elineno)), ttext, ltext 1117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch )) 1127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if toktype == token.INDENT: 1137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch indent += 1 1147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch elif toktype == token.DEDENT: 1157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch indent -= 1 1167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch elif toktype == token.NAME and ttext == 'class': 1177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Class definitions look like branches in the byte code, so 1187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # we need to exclude them. The simplest way is to note the 1197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # lines with the 'class' keyword. 1207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.classdefs.add(slineno) 1217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch elif toktype == token.OP and ttext == ':': 1227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if not excluding and elineno in self.excluded: 1237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Start excluding a suite. We trigger off of the colon 1247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # token so that the #pragma comment will be recognized on 1257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # the same line as the colon. 1267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exclude_indent = indent 1277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch excluding = True 1287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch elif toktype == token.STRING and prev_toktype == token.INDENT: 1297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Strings that are first on an indented line are docstrings. 1307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # (a trick from trace.py in the stdlib.) This works for 1317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # 99.9999% of cases. For the rest (!) see: 1327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # http://stackoverflow.com/questions/1769332/x/1769794#1769794 1337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for i in range(slineno, elineno+1): 1347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.docstrings.add(i) 1357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch elif toktype == token.NEWLINE: 1367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if first_line is not None and elineno != first_line: 1377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # We're at the end of a line, and we've ended on a 1387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # different line than the first line of the statement, 1397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # so record a multi-line range. 1407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch rng = (first_line, elineno) 1417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for l in range(first_line, elineno+1): 1427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.multiline[l] = rng 1437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch first_line = None 1447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if ttext.strip() and toktype != tokenize.COMMENT: 1467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # A non-whitespace token. 1477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch empty = False 1487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if first_line is None: 1497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The token is not whitespace, and is the first in a 1507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # statement. 1517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch first_line = slineno 1527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Check whether to end an excluded suite. 1537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if excluding and indent <= exclude_indent: 1547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch excluding = False 1557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if excluding: 1567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.excluded.add(elineno) 1577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch prev_toktype = toktype 1597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Find the starts of the executable statements. 1617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if not empty: 1627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.statement_starts.update(self.byte_parser._find_statements()) 1637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def first_line(self, line): 1657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Return the first line number of the statement including `line`.""" 1667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch rng = self.multiline.get(line) 1677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if rng: 1687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch first_line = rng[0] 1697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch else: 1707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch first_line = line 1717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return first_line 1727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def first_lines(self, lines, ignore=None): 1747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Map the line numbers in `lines` to the correct first line of the 1757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch statement. 1767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Skip any line mentioned in `ignore`. 1787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Returns a sorted list of the first lines. 1807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 1827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ignore = ignore or [] 1837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch lset = set() 1847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for l in lines: 1857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if l in ignore: 1867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch continue 1877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch new_l = self.first_line(l) 1887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if new_l not in ignore: 1897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch lset.add(new_l) 1907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return sorted(lset) 1917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def parse_source(self): 1937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Parse source text to find executable lines, excluded lines, etc. 1947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Return values are 1) a sorted list of executable line numbers, and 1967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2) a sorted list of excluded line numbers. 1977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 1987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Reported line numbers are normalized to the first line of multi-line 1997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch statements. 2007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 2027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self._raw_parse() 2037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch excluded_lines = self.first_lines(self.excluded) 2057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ignore = excluded_lines + list(self.docstrings) 2067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch lines = self.first_lines(self.statement_starts, ignore) 2077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return lines, excluded_lines 2097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def arcs(self): 2117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Get information about the arcs available in the code. 2127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Returns a sorted list of line number pairs. Line numbers have been 2147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch normalized to the first line of multiline statements. 2157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 2177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch all_arcs = [] 2187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for l1, l2 in self.byte_parser._all_arcs(): 2197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch fl1 = self.first_line(l1) 2207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch fl2 = self.first_line(l2) 2217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if fl1 != fl2: 2227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch all_arcs.append((fl1, fl2)) 2237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return sorted(all_arcs) 2247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch arcs = expensive(arcs) 2257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def exit_counts(self): 2277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Get a mapping from line numbers to count of exits from that line. 2287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Excluded lines are excluded. 2307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 2327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch excluded_lines = self.first_lines(self.excluded) 2337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exit_counts = {} 2347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for l1, l2 in self.arcs(): 2357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if l1 < 0: 2367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Don't ever report -1 as a line number 2377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch continue 2387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if l1 in excluded_lines: 2397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Don't report excluded lines as line numbers. 2407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch continue 2417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if l2 in excluded_lines: 2427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Arcs to excluded lines shouldn't count. 2437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch continue 2447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if l1 not in exit_counts: 2457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exit_counts[l1] = 0 2467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exit_counts[l1] += 1 2477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Class definitions have one extra exit, so remove one for each: 2497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for l in self.classdefs: 2507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Ensure key is there: classdefs can include excluded lines. 2517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if l in exit_counts: 2527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exit_counts[l] -= 1 2537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return exit_counts 2557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exit_counts = expensive(exit_counts) 2567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch## Opcodes that guide the ByteParser. 2597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochdef _opcode(name): 2617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Return the opcode by name from the opcode module.""" 2627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return opcode.opmap[name] 2637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochdef _opcode_set(*names): 2657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Return a set of opcodes by the names in `names`.""" 2667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch s = set() 2677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for name in names: 2687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch try: 2697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch s.add(_opcode(name)) 2707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch except KeyError: 2717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch pass 2727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return s 2737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that leave the code object. 2757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_CODE_END = _opcode_set('RETURN_VALUE') 2767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that unconditionally end the code chunk. 2787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_CHUNK_END = _opcode_set( 2797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', 2807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 'BREAK_LOOP', 'CONTINUE_LOOP', 2817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ) 2827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that unconditionally begin a new code chunk. By starting new chunks 2847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# with unconditional jump instructions, we neatly deal with jumps to jumps 2857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# properly. 2867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') 2877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that push a block on the block stack. 2897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_PUSH_BLOCK = _opcode_set( 2907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH' 2917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ) 2927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Block types for exception handling. 2947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') 2957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that pop a block from the block stack. 2977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_POP_BLOCK = _opcode_set('POP_BLOCK') 2987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 2997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Opcodes that have a jump destination, but aren't really a jump. 3007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOPS_NO_JUMP = OPS_PUSH_BLOCK 3017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch# Individual opcodes we need below. 3037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_BREAK_LOOP = _opcode('BREAK_LOOP') 3047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_END_FINALLY = _opcode('END_FINALLY') 3057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_COMPARE_OP = _opcode('COMPARE_OP') 3067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochCOMPARE_EXCEPTION = 10 # just have to get this const from the code. 3077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_LOAD_CONST = _opcode('LOAD_CONST') 3087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen MurdochOP_RETURN_VALUE = _opcode('RETURN_VALUE') 3097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochclass ByteParser(object): 3127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Parse byte codes to understand the structure of code.""" 3137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def __init__(self, code=None, text=None, filename=None): 3157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if code: 3167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.code = code 3177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.text = text 3187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch else: 3197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if not text: 3207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch assert filename, "If no code or text, need a filename" 3217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch sourcef = open_source(filename) 3227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch try: 3237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch text = sourcef.read() 3247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch finally: 3257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch sourcef.close() 3267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.text = text 3277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch try: 3297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Python 2.3 and 2.4 don't like partial last lines, so be sure 3307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # the text ends nicely for them. 3317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.code = compile(text + '\n', filename, "exec") 3327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch except SyntaxError: 3337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch _, synerr, _ = sys.exc_info() 3347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch raise NotPython( 3357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch "Couldn't parse '%s' as Python source: '%s' at line %d" % 3367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch (filename, synerr.msg, synerr.lineno) 3377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ) 3387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Alternative Python implementations don't always provide all the 3407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # attributes on code objects that we need to do the analysis. 3417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']: 3427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if not hasattr(self.code, attr): 3437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch raise CoverageException( 3447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch "This implementation of Python doesn't support code " 3457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch "analysis.\n" 3467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch "Run coverage.py under CPython for this command." 3477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ) 3487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def child_parsers(self): 3507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Iterate over all the code objects nested within this one. 3517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch The iteration includes `self` as its first value. 3537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 3557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch children = CodeObjects(self.code) 3567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return [ByteParser(code=c, text=self.text) for c in children] 3577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Getting numbers from the lnotab value changed in Py3.0. 3597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if sys.version_info >= (3, 0): 3607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _lnotab_increments(self, lnotab): 3617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Return a list of ints from the lnotab bytes in 3.x""" 3627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return list(lnotab) 3637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch else: 3647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _lnotab_increments(self, lnotab): 3657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Return a list of ints from the lnotab string in 2.x""" 3667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return [ord(c) for c in lnotab] 3677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _bytes_lines(self): 3697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Map byte offsets to line numbers in `code`. 3707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Uses co_lnotab described in Python/compile.c to map byte offsets to 3727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch line numbers. Returns a list: [(b0, l0), (b1, l1), ...] 3737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 3757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Adapted from dis.py in the standard library. 3767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) 3777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) 3787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_lines = [] 3807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch last_line_num = None 3817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch line_num = self.code.co_firstlineno 3827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_num = 0 3837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for byte_incr, line_incr in zip(byte_increments, line_increments): 3847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if byte_incr: 3857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if line_num != last_line_num: 3867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_lines.append((byte_num, line_num)) 3877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch last_line_num = line_num 3887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_num += byte_incr 3897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch line_num += line_incr 3907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if line_num != last_line_num: 3917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_lines.append((byte_num, line_num)) 3927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return bytes_lines 3937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _find_statements(self): 3957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Find the statements in `self.code`. 3967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 3977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Return a set of line numbers that start statements. Recurses into all 3987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch code objects reachable from `self.code`. 3997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 4017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch stmts = set() 4027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for bp in self.child_parsers(): 4037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Get all of the lineno information from this code. 4047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for _, l in bp._bytes_lines(): 4057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch stmts.add(l) 4067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return stmts 4077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _split_into_chunks(self): 4097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Split the code object into a list of `Chunk` objects. 4107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Each chunk is only entered at its first instruction, though there can 4127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch be many exits from a chunk. 4137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Returns a list of `Chunk` objects. 4157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 4177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The list of chunks so far, and the one we're working on. 4197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks = [] 4207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk = None 4217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_lines_map = dict(self._bytes_lines()) 4227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The block stack: loops and try blocks get pushed here for the 4247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # implicit jumps that can occur. 4257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Each entry is a tuple: (block type, destination) 4267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch block_stack = [] 4277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Some op codes are followed by branches that should be ignored. This 4297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # is a count of how many ignores are left. 4307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ignore_branch = 0 4317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # We have to handle the last two bytecodes specially. 4337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ult = penult = None 4347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for bc in ByteCodes(self.code.co_code): 4367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Maybe have to start a new chunk 4377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.offset in bytes_lines_map: 4387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Start a new chunk for each source line number. 4397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if chunk: 4407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(bc.offset) 4417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) 4427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks.append(chunk) 4437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch elif bc.op in OPS_CHUNK_BEGIN: 4447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Jumps deserve their own unnumbered chunk. This fixes 4457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # problems with jumps to jumps getting confused. 4467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if chunk: 4477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(bc.offset) 4487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk = Chunk(bc.offset) 4497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks.append(chunk) 4507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if not chunk: 4527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk = Chunk(bc.offset) 4537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks.append(chunk) 4547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Look at the opcode 4567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: 4577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if ignore_branch: 4587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Someone earlier wanted us to ignore this branch. 4597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ignore_branch -= 1 4607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch else: 4617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The opcode has a jump, it's an exit for this chunk. 4627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(bc.jump_to) 4637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.op in OPS_CODE_END: 4657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The opcode can exit the code object. 4667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(-self.code.co_firstlineno) 4677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.op in OPS_PUSH_BLOCK: 4687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The opcode adds a block to the block_stack. 4697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch block_stack.append((bc.op, bc.jump_to)) 4707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.op in OPS_POP_BLOCK: 4717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The opcode pops a block from the block stack. 4727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch block_stack.pop() 4737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.op in OPS_CHUNK_END: 4747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # This opcode forces the end of the chunk. 4757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.op == OP_BREAK_LOOP: 4767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # A break is implicit: jump where the top of the 4777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # block_stack points. 4787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(block_stack[-1][1]) 4797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk = None 4807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.op == OP_END_FINALLY: 4817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if block_stack: 4827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # A break that goes through a finally will jump to whatever 4837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # block is on top of the stack. 4847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(block_stack[-1][1]) 4857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # For the finally clause we need to find the closest exception 4867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # block, and use its jump target as an exit. 4877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for iblock in range(len(block_stack)-1, -1, -1): 4887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: 4897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(block_stack[iblock][1]) 4907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch break 4917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: 4927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # This is an except clause. We want to overlook the next 4937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # branch, so that except's don't count as branches. 4947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ignore_branch += 1 4957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch penult = ult 4977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ult = bc 4987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 4997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if chunks: 5007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # The last two bytecodes could be a dummy "return None" that 5017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # shouldn't be counted as real code. Every Python code object seems 5027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # to end with a return, and a "return None" is inserted if there 5037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # isn't an explicit return in the source. 5047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if ult and penult: 5057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: 5067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if self.code.co_consts[penult.arg] is None: 5077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # This is "return None", but is it dummy? A real line 5087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # would be a last chunk all by itself. 5097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if chunks[-1].byte != penult.offset: 5107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ex = -self.code.co_firstlineno 5117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Split the last chunk 5127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch last_chunk = chunks[-1] 5137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch last_chunk.exits.remove(ex) 5147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch last_chunk.exits.add(penult.offset) 5157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk = Chunk(penult.offset) 5167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunk.exits.add(ex) 5177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks.append(chunk) 5187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Give all the chunks a length. 5207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks[-1].length = bc.next_offset - chunks[-1].byte 5217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for i in range(len(chunks)-1): 5227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks[i].length = chunks[i+1].byte - chunks[i].byte 5237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return chunks 5257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _arcs(self): 5277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Find the executable arcs in the code. 5287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch Returns a set of pairs, (from,to). From and to are integer line 5307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch numbers. If from is < 0, then the arc is an entrance into the code 5317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch object. If to is < 0, the arc is an exit from the code object. 5327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 5347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks = self._split_into_chunks() 5357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # A map from byte offsets to chunks jumped into. 5377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_chunks = dict([(c.byte, c) for c in chunks]) 5387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Build a map from byte offsets to actual lines reached. 5407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_lines = {} 5417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_to_add = set([c.byte for c in chunks]) 5427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch while bytes_to_add: 5447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_to_add = bytes_to_add.pop() 5457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if byte_to_add in byte_lines or byte_to_add < 0: 5467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch continue 5477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Which lines does this chunk lead to? 5497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_considered = set() 5507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_to_consider = [byte_to_add] 5517757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch lines = set() 5527757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5537757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch while bytes_to_consider: 5547757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte = bytes_to_consider.pop() 5557757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_considered.add(byte) 5567757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5577757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Find chunk for byte 5587757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch try: 5597757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ch = byte_chunks[byte] 5607757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch except KeyError: 5617757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for ch in chunks: 5627757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if ch.byte <= byte < ch.byte+ch.length: 5637757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch break 5647757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch else: 5657757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # No chunk for this byte! 5667757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch raise Exception("Couldn't find chunk @ %d" % byte) 5677757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_chunks[byte] = ch 5687757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5697757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if ch.line: 5707757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch lines.add(ch.line) 5717757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch else: 5727757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for ex in ch.exits: 5737757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if ex < 0: 5747757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch lines.add(ex) 5757757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch elif ex not in bytes_considered: 5767757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_to_consider.append(ex) 5777757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5787757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch bytes_to_add.update(ch.exits) 5797757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5807757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch byte_lines[byte_to_add] = lines 5817757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5827757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch # Figure out for each chunk where the exits go. 5837757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch arcs = set() 5847757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for chunk in chunks: 5857757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if chunk.line: 5867757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for ex in chunk.exits: 5877757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if ex < 0: 5887757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exit_lines = [ex] 5897757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch else: 5907757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch exit_lines = byte_lines[ex] 5917757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for exit_line in exit_lines: 5927757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch if chunk.line != exit_line: 5937757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch arcs.add((chunk.line, exit_line)) 5947757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for line in byte_lines[0]: 5957757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch arcs.add((-1, line)) 5967757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5977757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return arcs 5987757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 5997757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _all_chunks(self): 6007757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Returns a list of `Chunk` objects for this code and its children. 6017757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6027757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch See `_split_into_chunks` for details. 6037757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6047757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 6057757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks = [] 6067757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for bp in self.child_parsers(): 6077757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch chunks.extend(bp._split_into_chunks()) 6087757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6097757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return chunks 6107757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6117757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def _all_arcs(self): 6127757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """Get the set of all arcs in this code object and its children. 6137757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6147757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch See `_arcs` for details. 6157757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6167757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 6177757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch arcs = set() 6187757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch for bp in self.child_parsers(): 6197757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch arcs.update(bp._arcs()) 6207757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6217757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return arcs 6227757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6237757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6247757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdochclass Chunk(object): 6257757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """A sequence of bytecodes with a single entrance. 6267757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6277757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch To analyze byte code, we have to divide it into chunks, sequences of byte 6287757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch codes such that each basic block has only one entrance, the first 6297757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch instruction in the block. 6307757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6317757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch This is almost the CS concept of `basic block`_, except that we're willing 6327757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch to have many exits from a chunk, and "basic block" is a more cumbersome 6337757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch term. 6347757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6357757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch .. _basic block: http://en.wikipedia.org/wiki/Basic_block 6367757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6377757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch An exit < 0 means the chunk can leave the code (return). The exit is 6387757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch the negative of the starting line number of the code block. 6397757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6407757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch """ 6417757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def __init__(self, byte, line=0): 6427757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.byte = byte 6437757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.line = line 6447757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.length = 0 6457757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.exits = set() 6467757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch 6477757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch def __repr__(self): 6487757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch return "<%d+%d @%d %r>" % ( 6497757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch self.byte, self.length, self.line, list(self.exits) 6507757ec2eadfa2dd8ac2aeed0a4399e9b07ec38cbBen Murdoch ) 651