195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#!/usr/bin/env python
295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# A tool to parse ASTMatchers.h and update the documentation in
395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# ../LibASTMatchersReference.html automatically. Run from the
495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# directory in which this file is located to update the docs.
595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport collections
795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport re
895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport urllib2
995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
1095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel KlimekMATCHERS_FILE = '../../include/clang/ASTMatchers/ASTMatchers.h'
1195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
1295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Each matcher is documented in one row of the form:
1395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   result | name | argA
1495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# The subsequent row contains the documentation and is hidden by default,
1595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# becoming visible via javascript when the user clicks the matcher name.
1695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel KlimekTD_TEMPLATE="""
1767619ff51b49ca34a4b6ee420af01b87fa63dcc5Manuel Klimek<tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr>
1895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek<tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr>
1995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek"""
2095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
2195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We categorize the matchers into these three categories in the reference:
2295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknode_matchers = {}
2395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknarrowing_matchers = {}
2495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimektraversal_matchers = {}
2595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
2695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We output multiple rows per matcher if the matcher can be used on multiple
2795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# node types. Thus, we need a new id per row to control the documentation
2895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# pop-up. ids[name] keeps track of those ids.
2995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekids = collections.defaultdict(int)
3095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
3195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Cache for doxygen urls we have already verified.
3295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdoxygen_probes = {}
3395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
3495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef esc(text):
3595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Escape any html in the given text."""
3695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(r'&', '&amp;', text)
3795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(r'<', '&lt;', text)
3895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(r'>', '&gt;', text)
3995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  def link_if_exists(m):
4095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    name = m.group(1)
4195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    url = 'http://clang.llvm.org/doxygen/classclang_1_1%s.html' % name
4295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if url not in doxygen_probes:
4395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      try:
4495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        print 'Probing %s...' % url
4595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        urllib2.urlopen(url)
4695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        doxygen_probes[url] = True
4795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      except:
4895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        doxygen_probes[url] = False
4995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if doxygen_probes[url]:
504967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar      return r'Matcher&lt;<a href="%s">%s</a>&gt;' % (url, name)
5195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    else:
5295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      return m.group(0)
5395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  text = re.sub(
5495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    r'Matcher&lt;([^\*&]+)&gt;', link_if_exists, text)
5595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return text
5695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
5795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef extract_result_types(comment):
5895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Extracts a list of result types from the given comment.
5995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
6095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     We allow annotations in the comment of the matcher to specify what
6195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     nodes a matcher can match on. Those comments have the form:
6295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek       Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]])
6395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
6495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...].
6595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     Returns the empty list if no 'Usable as' specification could be
6695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     parsed.
6795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """
6895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  result_types = []
6995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  m = re.search(r'Usable as: Any Matcher[\s\n]*$', comment, re.S)
7095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if m:
7195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    return ['*']
7295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  while True:
7395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    m = re.match(r'^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$', comment, re.S)
7495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if not m:
7595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if re.search(r'Usable as:\s*$', comment):
7695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        return result_types
7795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
7895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        return None
7995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    result_types += [m.group(2)]
8095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    comment = m.group(1)
8195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
8295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef strip_doxygen(comment):
8395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Returns the given comment without \-escaped words."""
8495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # If there is only a doxygen keyword in the line, delete the whole line.
8595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  comment = re.sub(r'^\\[^\s]+\n', r'', comment, flags=re.M)
864967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar
874967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  # If there is a doxygen \see command, change the \see prefix into "See also:".
884967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  # FIXME: it would be better to turn this into a link to the target instead.
894967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  comment = re.sub(r'\\see', r'See also:', comment)
904967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar
9195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # Delete the doxygen command and the following whitespace.
9295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  comment = re.sub(r'\\[^\s]+\s+', r'', comment)
9395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return comment
9495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
9595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef unify_arguments(args):
9695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Gets rid of anything the user doesn't care about in the argument list."""
9795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = re.sub(r'internal::', r'', args)
984967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  args = re.sub(r'const\s+(.*)&', r'\1 ', args)
9995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = re.sub(r'&', r' ', args)
10095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = re.sub(r'(^|\s)M\d?(\s)', r'\1Matcher<*>\2', args)
10195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return args
10295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
10395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef add_matcher(result_type, name, args, comment, is_dyncast=False):
10495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Adds a matcher to one of our categories."""
10595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if name == 'id':
10695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     # FIXME: Figure out whether we want to support the 'id' matcher.
10795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     return
10895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  matcher_id = '%s%d' % (name, ids[name])
10995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  ids[name] += 1
11095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  args = unify_arguments(args)
11195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  matcher_html = TD_TEMPLATE % {
11295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'result': esc('Matcher<%s>' % result_type),
11395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'name': name,
11495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'args': esc(args),
11595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'comment': esc(strip_doxygen(comment)),
11695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'id': matcher_id,
11795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  }
11895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if is_dyncast:
11995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    node_matchers[result_type + name] = matcher_html
12095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # Use a heuristic to figure out whether a matcher is a narrowing or
12195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # traversal matcher. By default, matchers that take other matchers as
12295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # arguments (and are not node matchers) do traversal. We specifically
12395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # exclude known narrowing matchers that also take other matchers as
12495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  # arguments.
12595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  elif ('Matcher<' not in args or
12695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        name in ['allOf', 'anyOf', 'anything', 'unless']):
127651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    narrowing_matchers[result_type + name + esc(args)] = matcher_html
12895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  else:
129651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    traversal_matchers[result_type + name + esc(args)] = matcher_html
13095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
13195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef act_on_decl(declaration, comment, allowed_types):
13295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Parse the matcher out of the given declaration and comment.
13395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
13495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     If 'allowed_types' is set, it contains a list of node types the matcher
13595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     can match on, as extracted from the static type asserts in the matcher
13695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek     definition.
13795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """
13895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if declaration.strip():
13995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    # Node matchers are defined by writing:
14095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    #   VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;
14141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek    m = re.match(r""".*Variadic(?:DynCast)?AllOfMatcher\s*<
14241df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                       \s*([^\s,]+)\s*(?:,
14341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                       \s*([^\s>]+)\s*)?>
14495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       \s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X)
14595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if m:
14695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      result, inner, name = m.groups()
14741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek      if not inner:
14841df16e2a835f547b9384643e1804e75940e74ddManuel Klimek        inner = result
14995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      add_matcher(result, name, 'Matcher<%s>...' % inner,
15095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                  comment, is_dyncast=True)
15195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      return
15295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
15395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    # Parse the various matcher definition macros.
15441df16e2a835f547b9384643e1804e75940e74ddManuel Klimek    m = re.match(""".*AST_TYPE_MATCHER\(
15541df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                       \s*([^\s,]+\s*),
15641df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                       \s*([^\s,]+\s*)
15741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                     \)\s*;\s*$""", declaration, flags=re.X)
15841df16e2a835f547b9384643e1804e75940e74ddManuel Klimek    if m:
15941df16e2a835f547b9384643e1804e75940e74ddManuel Klimek      inner, name = m.groups()
16041df16e2a835f547b9384643e1804e75940e74ddManuel Klimek      add_matcher('Type', name, 'Matcher<%s>...' % inner,
16141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                  comment, is_dyncast=True)
1622c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek      # FIXME: re-enable once we have implemented casting on the TypeLoc
1632c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek      # hierarchy.
1642c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek      # add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner,
1652c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek      #             comment, is_dyncast=True)
16641df16e2a835f547b9384643e1804e75940e74ddManuel Klimek      return
16741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek
16841df16e2a835f547b9384643e1804e75940e74ddManuel Klimek    m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER\(
16941df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                       \s*([^\s,]+\s*),
1703f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen                       \s*(?:[^\s,]+\s*),
1713ea9e33ea25e0c2b12db56418ba3f994eb662c04Pirama Arumuga Nainar                       \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
17241df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                     \)\s*;\s*$""", declaration, flags=re.X)
17341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek    if m:
17487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar      loc, name, results = m.groups()[0:3]
1753f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen      result_types = [r.strip() for r in results.split(',')]
1763f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen
1773f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen      comment_result_types = extract_result_types(comment)
1783f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen      if (comment_result_types and
1793f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen          sorted(result_types) != sorted(comment_result_types)):
1803f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen        raise Exception('Inconsistent documentation for: %s' % name)
18141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek      for result_type in result_types:
18241df16e2a835f547b9384643e1804e75940e74ddManuel Klimek        add_matcher(result_type, name, 'Matcher<Type>', comment)
18341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek        if loc:
18441df16e2a835f547b9384643e1804e75940e74ddManuel Klimek          add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>',
18541df16e2a835f547b9384643e1804e75940e74ddManuel Klimek                      comment)
18641df16e2a835f547b9384643e1804e75940e74ddManuel Klimek      return
18741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek
188ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen    m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
189ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                          \s*([^\s,]+)\s*,
1903ea9e33ea25e0c2b12db56418ba3f994eb662c04Pirama Arumuga Nainar                          \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
191ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                       (?:,\s*([^\s,]+)\s*
192ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                          ,\s*([^\s,]+)\s*)?
193ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                       (?:,\s*([^\s,]+)\s*
194ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                          ,\s*([^\s,]+)\s*)?
195ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                       (?:,\s*\d+\s*)?
196ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                      \)\s*{\s*$""", declaration, flags=re.X)
197ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen
198ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen    if m:
19987d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar      p, n, name, results = m.groups()[0:4]
20087d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar      args = m.groups()[4:]
201ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      result_types = [r.strip() for r in results.split(',')]
202ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      if allowed_types and allowed_types != result_types:
203ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen        raise Exception('Inconsistent documentation for: %s' % name)
204ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      if n not in ['', '2']:
205ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen        raise Exception('Cannot parse "%s"' % declaration)
206ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      args = ', '.join('%s %s' % (args[i], args[i+1])
207ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen                       for i in range(0, len(args), 2) if args[i])
208ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      for result_type in result_types:
209ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen        add_matcher(result_type, name, args, comment)
210ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      return
211ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen
212651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\(
213651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                       (?:\s*([^\s,]+)\s*,)?
214651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                          \s*([^\s,]+)\s*
215651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                       (?:,\s*([^\s,]+)\s*
216651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                          ,\s*([^\s,]+)\s*)?
217651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                       (?:,\s*([^\s,]+)\s*
218651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                          ,\s*([^\s,]+)\s*)?
219651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                       (?:,\s*\d+\s*)?
220651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                      \)\s*{\s*$""", declaration, flags=re.X)
221651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    if m:
222651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      p, n, result, name = m.groups()[0:4]
223651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      args = m.groups()[4:]
224651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      if n not in ['', '2']:
225651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        raise Exception('Cannot parse "%s"' % declaration)
226651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      args = ', '.join('%s %s' % (args[i], args[i+1])
227651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                       for i in range(0, len(args), 2) if args[i])
228651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      add_matcher(result, name, args, comment)
229651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      return
230651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
231ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen    m = re.match(r"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
23295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       (?:\s*([^\s,]+)\s*,)?
23395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                          \s*([^\s,]+)\s*
2344967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar                       (?:,\s*([^,]+)\s*
23595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                          ,\s*([^\s,]+)\s*)?
23695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       (?:,\s*([^\s,]+)\s*
23795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                          ,\s*([^\s,]+)\s*)?
238415514d5fbc2761b7f2938aa9112b079249820d6Manuel Klimek                       (?:,\s*\d+\s*)?
23995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                      \)\s*{\s*$""", declaration, flags=re.X)
24095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if m:
241ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      p, n, result, name = m.groups()[0:4]
242ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen      args = m.groups()[4:]
24395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if not result:
24495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        if not allowed_types:
24595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          raise Exception('Did not find allowed result types for: %s' % name)
24695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        result_types = allowed_types
24795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
24895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        result_types = [result]
24995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if n not in ['', '2']:
25095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        raise Exception('Cannot parse "%s"' % declaration)
25195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      args = ', '.join('%s %s' % (args[i], args[i+1])
25295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                       for i in range(0, len(args), 2) if args[i])
25395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      for result_type in result_types:
25495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        add_matcher(result_type, name, args, comment)
25595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      return
25695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
257ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen    # Parse ArgumentAdapting matchers.
258ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen    m = re.match(
2596c1dc7870f457803a9b256ed868da82532be820bSamuel Benzaquen        r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*(?:LLVM_ATTRIBUTE_UNUSED\s*)
2606c1dc7870f457803a9b256ed868da82532be820bSamuel Benzaquen              ([a-zA-Z]*)\s*=\s*{};$""",
261ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen        declaration, flags=re.X)
262ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen    if m:
263ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen      name = m.groups()[0]
264ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen      add_matcher('*', name, 'Matcher<*>', comment)
265ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen      return
266ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen
2674967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar    # Parse Variadic functions.
2684967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar    m = re.match(
2694967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar        r"""^.*internal::VariadicFunction\s*<\s*([^,]+),\s*([^,]+),\s*[^>]+>\s*
2704967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar              ([a-zA-Z]*)\s*=\s*{.*};$""",
2714967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar        declaration, flags=re.X)
2724967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar    if m:
2734967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar      result, arg, name = m.groups()[:3]
2744967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar      add_matcher(result, name, '%s, ..., %s' % (arg, arg), comment)
2754967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar      return
2764967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar
277d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen    # Parse Variadic operator matchers.
278d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen    m = re.match(
279651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s>]+)\s*>\s*
280651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines              ([a-zA-Z]*)\s*=\s*{.*};$""",
281d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen        declaration, flags=re.X)
282d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen    if m:
283651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      min_args, max_args, name = m.groups()[:3]
284651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      if max_args == '1':
285651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        add_matcher('*', name, 'Matcher<*>', comment)
286651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        return
287651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      elif max_args == 'UINT_MAX':
288651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        add_matcher('*', name, 'Matcher<*>, ..., Matcher<*>', comment)
289651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines        return
290d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen
291ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen
29295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    # Parse free standing matcher functions, like:
29395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    #   Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) {
29495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    m = re.match(r"""^\s*(.*)\s+
29595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                     ([^\s\(]+)\s*\(
29695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                     (.*)
29795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek                     \)\s*{""", declaration, re.X)
29895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if m:
29995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      result, name, args = m.groups()
30095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      args = ', '.join(p.strip() for p in args.split(','))
30141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek      m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result)
30295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if m:
30341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek        result_types = [m.group(2)]
30495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
30595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        result_types = extract_result_types(comment)
30695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if not result_types:
30795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        if not comment:
30895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          # Only overloads don't have their own doxygen comments; ignore those.
30995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          print 'Ignoring "%s"' % name
31095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        else:
31195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          print 'Cannot determine result type for "%s"' % name
31295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
31395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        for result_type in result_types:
31495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          add_matcher(result_type, name, args, comment)
31595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    else:
31695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      print '*** Unparsable: "' + declaration + '" ***'
31795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
31895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef sort_table(matcher_type, matcher_map):
31995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  """Returns the sorted html table for the given row map."""
32095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  table = ''
32195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  for key in sorted(matcher_map.keys()):
32295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    table += matcher_map[key] + '\n'
32395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  return ('<!-- START_%(type)s_MATCHERS -->\n' +
32495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          '%(table)s' +
32595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek          '<!--END_%(type)s_MATCHERS -->') % {
32695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'type': matcher_type,
32795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    'table': table,
32895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  }
32995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
33095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Parse the ast matchers.
33195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We alternate between two modes:
33295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# body = True: We parse the definition of a matcher. We need
33395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   to parse the full definition before adding a matcher, as the
33495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   definition might contain static asserts that specify the result
33595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#   type.
33695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# body = False: We parse the comments and declaration of the matcher.
33795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekcomment = ''
33895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdeclaration = ''
33995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekallowed_types = []
34095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekbody = False
34195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekfor line in open(MATCHERS_FILE).read().splitlines():
34295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if body:
34395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if line.strip() and line[0] == '}':
34495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if declaration:
34595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        act_on_decl(declaration, comment, allowed_types)
34695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        comment = ''
34795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        declaration = ''
34895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        allowed_types = []
34995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      body = False
35095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    else:
35195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      m = re.search(r'is_base_of<([^,]+), NodeType>', line)
35295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if m and m.group(1):
35395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        allowed_types += [m.group(1)]
35495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    continue
35595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  if line.strip() and line.lstrip()[0] == '/':
35695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    comment += re.sub(r'/+\s?', '', line) + '\n'
35795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  else:
35895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    declaration += ' ' + line
35995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek    if ((not line.strip()) or
36095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        line.rstrip()[-1] == ';' or
361d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen        (line.rstrip()[-1] == '{' and line.rstrip()[-3:] != '= {')):
36295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      if line.strip() and line.rstrip()[-1] == '{':
36395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        body = True
36495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek      else:
36595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        act_on_decl(declaration, comment, allowed_types)
36695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        comment = ''
36795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        declaration = ''
36895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek        allowed_types = []
36995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
37095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknode_matcher_table = sort_table('DECL', node_matchers)
37195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknarrowing_matcher_table = sort_table('NARROWING', narrowing_matchers)
37295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimektraversal_matcher_table = sort_table('TRAVERSAL', traversal_matchers)
37395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
37495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = open('../LibASTMatchersReference.html').read()
37595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->',
3764967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar                   node_matcher_table, reference, flags=re.S)
37795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->',
3784967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar                   narrowing_matcher_table, reference, flags=re.S)
37995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->',
3804967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar                   traversal_matcher_table, reference, flags=re.S)
38195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
38287d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainarwith open('../LibASTMatchersReference.html', 'wb') as output:
38395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek  output.write(reference)
38495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek
385