1# Copyright (c) 2011 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Crocodile source scanners."""
6
7
8import re
9
10
11class Scanner(object):
12  """Generic source scanner."""
13
14  def __init__(self):
15    """Constructor."""
16
17    self.re_token = re.compile('#')
18    self.comment_to_eol = ['#']
19    self.comment_start = None
20    self.comment_end = None
21
22  def ScanLines(self, lines):
23    """Scans the lines for executable statements.
24
25    Args:
26      lines: Iterator returning source lines.
27
28    Returns:
29      An array of line numbers which are executable.
30    """
31    exe_lines = []
32    lineno = 0
33
34    in_string = None
35    in_comment = None
36    comment_index = None
37
38    for line in lines:
39      lineno += 1
40      in_string_at_start = in_string
41
42      for t in self.re_token.finditer(line):
43        tokenstr = t.groups()[0]
44
45        if in_comment:
46          # Inside a multi-line comment, so look for end token
47          if tokenstr == in_comment:
48            in_comment = None
49            # Replace comment with spaces
50            line = (line[:comment_index]
51                    + ' ' * (t.end(0) - comment_index)
52                    + line[t.end(0):])
53
54        elif in_string:
55          # Inside a string, so look for end token
56          if tokenstr == in_string:
57            in_string = None
58
59        elif tokenstr in self.comment_to_eol:
60          # Single-line comment, so truncate line at start of token
61          line = line[:t.start(0)]
62          break
63
64        elif tokenstr == self.comment_start:
65          # Multi-line comment start - end token is comment_end
66          in_comment = self.comment_end
67          comment_index = t.start(0)
68
69        else:
70          # Starting a string - end token is same as start
71          in_string = tokenstr
72
73      # If still in comment at end of line, remove comment
74      if in_comment:
75        line = line[:comment_index]
76        # Next line, delete from the beginnine
77        comment_index = 0
78
79      # If line-sans-comments is not empty, claim it may be executable
80      if line.strip() or in_string_at_start:
81        exe_lines.append(lineno)
82
83    # Return executable lines
84    return exe_lines
85
86  def Scan(self, filename):
87    """Reads the file and scans its lines.
88
89    Args:
90      filename: Path to file to scan.
91
92    Returns:
93      An array of line numbers which are executable.
94    """
95
96    # TODO: All manner of error checking
97    f = None
98    try:
99      f = open(filename, 'rt')
100      return self.ScanLines(f)
101    finally:
102      if f:
103        f.close()
104
105
106class PythonScanner(Scanner):
107  """Python source scanner."""
108
109  def __init__(self):
110    """Constructor."""
111    Scanner.__init__(self)
112
113    # TODO: This breaks for strings ending in more than 2 backslashes.  Need
114    # a pattern which counts only an odd number of backslashes, so the last
115    # one thus escapes the quote.
116    self.re_token = re.compile(r'(#|\'\'\'|"""|(?<!(?<!\\)\\)["\'])')
117    self.comment_to_eol = ['#']
118    self.comment_start = None
119    self.comment_end = None
120
121
122class CppScanner(Scanner):
123  """C / C++ / ObjC / ObjC++ source scanner."""
124
125  def __init__(self):
126    """Constructor."""
127    Scanner.__init__(self)
128
129    # TODO: This breaks for strings ending in more than 2 backslashes.  Need
130    # a pattern which counts only an odd number of backslashes, so the last
131    # one thus escapes the quote.
132    self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])')
133
134    # TODO: Treat '\' at EOL as a token, and handle it as continuing the
135    # previous line.  That is, if in a comment-to-eol, this line is a comment
136    # too.
137
138    # Note that we treat # at beginning of line as a comment, so that we ignore
139    # preprocessor definitions
140    self.comment_to_eol = ['//', '#']
141
142    self.comment_start = '/*'
143    self.comment_end = '*/'
144
145
146def ScanFile(filename, language):
147  """Scans a file for executable lines.
148
149  Args:
150    filename: Path to file to scan.
151    language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')
152
153  Returns:
154    A list of executable lines, or an empty list if the file was not a handled
155        language.
156  """
157
158  if language == 'python':
159    return PythonScanner().Scan(filename)
160  elif language in ['C', 'C++', 'ObjC', 'ObjC++']:
161    return CppScanner().Scan(filename)
162
163  # Something we don't handle
164  return []
165