195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#!/usr/bin/env python
295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# A tool to parse ASTMatchers.h and update the documentation in
395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# ../LibASTMatchersReference.html automatically. Run from the
495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# directory in which this file is located to update the docs.
595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport collections
795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport re
895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport urllib2
995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
1095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel KlimekMATCHERS_FILE = '../../include/clang/ASTMatchers/ASTMatchers.h'
1195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
1295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Each matcher is documented in one row of the form:
1395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   result | name | argA
1495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# The subsequent row contains the documentation and is hidden by default,
1595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# becoming visible via javascript when the user clicks the matcher name.
1695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel KlimekTD_TEMPLATE="""
1767619ff51b49ca34a4b6ee420af01b87fa63dcc5Manuel Klimek<tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr>
1895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek<tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr>
1995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek"""
2095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
2195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We categorize the matchers into these three categories in the reference:
2295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknode_matchers = {}
2395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknarrowing_matchers = {}
2495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimektraversal_matchers = {}
2595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
2695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We output multiple rows per matcher if the matcher can be used on multiple
2795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# node types. Thus, we need a new id per row to control the documentation
2895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# pop-up. ids[name] keeps track of those ids.
2995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekids = collections.defaultdict(int)
3095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
3195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Cache for doxygen urls we have already verified.
3295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdoxygen_probes = {}
3395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
3495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef esc(text):
3595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Escape any html in the given text."""
3695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(r'&', '&amp;', text)
3795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(r'<', '&lt;', text)
3895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(r'>', '&gt;', text)
3995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  def link_if_exists(m):
4095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    name = m.group(1)
4195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    url = 'http://clang.llvm.org/doxygen/classclang_1_1%s.html' % name
4295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if url not in doxygen_probes:
4395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      try:
4495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        print 'Probing %s...' % url
4595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        urllib2.urlopen(url)
4695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        doxygen_probes[url] = True
4795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      except:
4895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        doxygen_probes[url] = False
4995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if doxygen_probes[url]:
5095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      return r'Matcher&lt<a href="%s">%s</a>&gt;' % (url, name)
5195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    else:
5295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      return m.group(0)
5395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(
5495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    r'Matcher&lt;([^\*&]+)&gt;', link_if_exists, text)
5595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return text
5695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
5795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef extract_result_types(comment):
5895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Extracts a list of result types from the given comment.
5995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
6095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     We allow annotations in the comment of the matcher to specify what
6195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     nodes a matcher can match on. Those comments have the form:
6295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek       Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]])
6395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
6495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...].
6595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     Returns the empty list if no 'Usable as' specification could be
6695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     parsed.
6795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """
6895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  result_types = []
6995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  m = re.search(r'Usable as: Any Matcher[\s\n]*$', comment, re.S)
7095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if m:
7195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    return ['*']
7295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  while True:
7395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    m = re.match(r'^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$', comment, re.S)
7495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if not m:
7595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if re.search(r'Usable as:\s*$', comment):
7695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        return result_types
7795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
7895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        return None
7995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    result_types += [m.group(2)]
8095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    comment = m.group(1)
8195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
8295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef strip_doxygen(comment):
8395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Returns the given comment without \-escaped words."""
8495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # If there is only a doxygen keyword in the line, delete the whole line.
8595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  comment = re.sub(r'^\\[^\s]+\n', r'', comment, flags=re.M)
8695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # Delete the doxygen command and the following whitespace.
8795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  comment = re.sub(r'\\[^\s]+\s+', r'', comment)
8895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return comment
8995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
9095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef unify_arguments(args):
9195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Gets rid of anything the user doesn't care about in the argument list."""
9295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = re.sub(r'internal::', r'', args)
9395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = re.sub(r'const\s+', r'', args)
9495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = re.sub(r'&', r' ', args)
9595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = re.sub(r'(^|\s)M\d?(\s)', r'\1Matcher<*>\2', args)
9695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return args
9795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
9895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef add_matcher(result_type, name, args, comment, is_dyncast=False):
9995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Adds a matcher to one of our categories."""
10095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if name == 'id':
10195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     # FIXME: Figure out whether we want to support the 'id' matcher.
10295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     return
10395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  matcher_id = '%s%d' % (name, ids[name])
10495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  ids[name] += 1
10595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = unify_arguments(args)
10695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  matcher_html = TD_TEMPLATE % {
10795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'result': esc('Matcher<%s>' % result_type),
10895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'name': name,
10995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'args': esc(args),
11095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'comment': esc(strip_doxygen(comment)),
11195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'id': matcher_id,
11295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  }
11395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if is_dyncast:
11495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    node_matchers[result_type + name] = matcher_html
11595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # Use a heuristic to figure out whether a matcher is a narrowing or
11695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # traversal matcher. By default, matchers that take other matchers as
11795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # arguments (and are not node matchers) do traversal. We specifically
11895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # exclude known narrowing matchers that also take other matchers as
11995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # arguments.
12095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  elif ('Matcher<' not in args or
12195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        name in ['allOf', 'anyOf', 'anything', 'unless']):
12295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    narrowing_matchers[result_type + name] = matcher_html
12395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  else:
12495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    traversal_matchers[result_type + name] = matcher_html
12595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
12695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef act_on_decl(declaration, comment, allowed_types):
12795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Parse the matcher out of the given declaration and comment.
12895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
12995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     If 'allowed_types' is set, it contains a list of node types the matcher
13095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     can match on, as extracted from the static type asserts in the matcher
13195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     definition.
13295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """
13395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if declaration.strip():
13495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    # Node matchers are defined by writing:
13595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    #   VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;
13695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    m = re.match(r""".*VariadicDynCastAllOfMatcher\s*<
13795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       \s*([^\s,]+)\s*,
13895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       \s*([^\s>]+)\s*>
13995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       \s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X)
14095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if m:
14195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      result, inner, name = m.groups()
14295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      add_matcher(result, name, 'Matcher<%s>...' % inner,
14395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                  comment, is_dyncast=True)
14495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      return
14595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
14695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    # Parse the various matcher definition macros.
14795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    m = re.match(r"""^\s*AST_(POLYMORPHIC_)?MATCHER(_P)?(.?)\(
14895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       (?:\s*([^\s,]+)\s*,)?
14995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                          \s*([^\s,]+)\s*
15095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       (?:,\s*([^\s,]+)\s*
15195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                          ,\s*([^\s,]+)\s*)?
15295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       (?:,\s*([^\s,]+)\s*
15395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                          ,\s*([^\s,]+)\s*)?
15495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                      \)\s*{\s*$""", declaration, flags=re.X)
15595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if m:
15695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      p, n, result, name = m.groups()[1:5]
15795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      args = m.groups()[5:]
15895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if not result:
15995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        if not allowed_types:
16095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          raise Exception('Did not find allowed result types for: %s' % name)
16195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        result_types = allowed_types
16295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
16395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        result_types = [result]
16495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if n not in ['', '2']:
16595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        raise Exception('Cannot parse "%s"' % declaration)
16695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      args = ', '.join('%s %s' % (args[i], args[i+1])
16795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       for i in range(0, len(args), 2) if args[i])
16895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      for result_type in result_types:
16995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        add_matcher(result_type, name, args, comment)
17095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      return
17195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
17295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    # Parse free standing matcher functions, like:
17395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    #   Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) {
17495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    m = re.match(r"""^\s*(.*)\s+
17595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                     ([^\s\(]+)\s*\(
17695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                     (.*)
17795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                     \)\s*{""", declaration, re.X)
17895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if m:
17995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      result, name, args = m.groups()
18095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      args = ', '.join(p.strip() for p in args.split(','))
18195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      m = re.match(r'.*\s+internal::Matcher<([^>]+)>$', result)
18295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if m:
18395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        result_types = [m.group(1)]
18495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
18595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        result_types = extract_result_types(comment)
18695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if not result_types:
18795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        if not comment:
18895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          # Only overloads don't have their own doxygen comments; ignore those.
18995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          print 'Ignoring "%s"' % name
19095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        else:
19195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          print 'Cannot determine result type for "%s"' % name
19295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
19395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        for result_type in result_types:
19495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          add_matcher(result_type, name, args, comment)
19595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    else:
19695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      print '*** Unparsable: "' + declaration + '" ***'
19795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
19895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef sort_table(matcher_type, matcher_map):
19995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Returns the sorted html table for the given row map."""
20095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  table = ''
20195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  for key in sorted(matcher_map.keys()):
20295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    table += matcher_map[key] + '\n'
20395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return ('<!-- START_%(type)s_MATCHERS -->\n' +
20495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          '%(table)s' +
20595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          '<!--END_%(type)s_MATCHERS -->') % {
20695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'type': matcher_type,
20795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'table': table,
20895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  }
20995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
21095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Parse the ast matchers.
21195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We alternate between two modes:
21295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# body = True: We parse the definition of a matcher. We need
21395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   to parse the full definition before adding a matcher, as the
21495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   definition might contain static asserts that specify the result
21595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   type.
21695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# body = False: We parse the comments and declaration of the matcher.
21795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekcomment = ''
21895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdeclaration = ''
21995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekallowed_types = []
22095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekbody = False
22195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekfor line in open(MATCHERS_FILE).read().splitlines():
22295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if body:
22395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if line.strip() and line[0] == '}':
22495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if declaration:
22595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        act_on_decl(declaration, comment, allowed_types)
22695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        comment = ''
22795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        declaration = ''
22895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        allowed_types = []
22995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      body = False
23095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    else:
23195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      m = re.search(r'is_base_of<([^,]+), NodeType>', line)
23295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if m and m.group(1):
23395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        allowed_types += [m.group(1)]
23495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    continue
23595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if line.strip() and line.lstrip()[0] == '/':
23695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    comment += re.sub(r'/+\s?', '', line) + '\n'
23795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  else:
23895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    declaration += ' ' + line
23995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if ((not line.strip()) or
24095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        line.rstrip()[-1] == ';' or
24195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        line.rstrip()[-1] == '{'):
24295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if line.strip() and line.rstrip()[-1] == '{':
24395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        body = True
24495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
24595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        act_on_decl(declaration, comment, allowed_types)
24695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        comment = ''
24795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        declaration = ''
24895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        allowed_types = []
24995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
25095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknode_matcher_table = sort_table('DECL', node_matchers)
25195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknarrowing_matcher_table = sort_table('NARROWING', narrowing_matchers)
25295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimektraversal_matcher_table = sort_table('TRAVERSAL', traversal_matchers)
25395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
25495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = open('../LibASTMatchersReference.html').read()
25595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->',
25695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                   '%s', reference, flags=re.S) % node_matcher_table
25795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->',
25895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                   '%s', reference, flags=re.S) % narrowing_matcher_table
25995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->',
26095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                   '%s', reference, flags=re.S) % traversal_matcher_table
26195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
26295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekwith open('../LibASTMatchersReference.html', 'w') as output:
26395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  output.write(reference)
26495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
265