195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek#!/usr/bin/env python 295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# A tool to parse ASTMatchers.h and update the documentation in 395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# ../LibASTMatchersReference.html automatically. Run from the 495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# directory in which this file is located to update the docs. 595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport collections 795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport re 895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekimport urllib2 995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 1095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel KlimekMATCHERS_FILE = '../../include/clang/ASTMatchers/ASTMatchers.h' 1195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 1295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Each matcher is documented in one row of the form: 1395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# result | name | argA 1495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# The subsequent row contains the documentation and is hidden by default, 1595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# becoming visible via javascript when the user clicks the matcher name. 1695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel KlimekTD_TEMPLATE=""" 1767619ff51b49ca34a4b6ee420af01b87fa63dcc5Manuel Klimek<tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr> 1895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek<tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr> 1995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek""" 2095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 2195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We categorize the matchers into these three categories in the reference: 2295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknode_matchers = {} 2395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknarrowing_matchers = {} 2495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimektraversal_matchers = {} 2595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 2695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We output multiple rows per matcher if the matcher can be used on multiple 2795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# node types. Thus, we need a new id per row to control the documentation 2895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# pop-up. ids[name] keeps track of those ids. 2995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekids = collections.defaultdict(int) 3095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 3195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Cache for doxygen urls we have already verified. 3295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdoxygen_probes = {} 3395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 3495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef esc(text): 3595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """Escape any html in the given text.""" 3695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek text = re.sub(r'&', '&', text) 3795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek text = re.sub(r'<', '<', text) 3895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek text = re.sub(r'>', '>', text) 3995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek def link_if_exists(m): 4095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek name = m.group(1) 4195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek url = 'http://clang.llvm.org/doxygen/classclang_1_1%s.html' % name 4295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if url not in doxygen_probes: 4395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek try: 4495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek print 'Probing %s...' % url 4595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek urllib2.urlopen(url) 4695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek doxygen_probes[url] = True 4795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek except: 4895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek doxygen_probes[url] = False 4995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if doxygen_probes[url]: 504967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar return r'Matcher<<a href="%s">%s</a>>' % (url, name) 5195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 5295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return m.group(0) 5395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek text = re.sub( 5495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek r'Matcher<([^\*&]+)>', link_if_exists, text) 5595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return text 5695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 5795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef extract_result_types(comment): 5895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """Extracts a list of result types from the given comment. 5995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 6095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek We allow annotations in the comment of the matcher to specify what 6195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek nodes a matcher can match on. Those comments have the form: 6295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]]) 6395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 6495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...]. 6595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek Returns the empty list if no 'Usable as' specification could be 6695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek parsed. 6795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """ 6895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek result_types = [] 6995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek m = re.search(r'Usable as: Any Matcher[\s\n]*$', comment, re.S) 7095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if m: 7195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return ['*'] 7295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek while True: 7395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek m = re.match(r'^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$', comment, re.S) 7495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if not m: 7595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if re.search(r'Usable as:\s*$', comment): 7695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return result_types 7795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 7895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return None 7995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek result_types += [m.group(2)] 8095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek comment = m.group(1) 8195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 8295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef strip_doxygen(comment): 8395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """Returns the given comment without \-escaped words.""" 8495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # If there is only a doxygen keyword in the line, delete the whole line. 8595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek comment = re.sub(r'^\\[^\s]+\n', r'', comment, flags=re.M) 864967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar 874967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar # If there is a doxygen \see command, change the \see prefix into "See also:". 884967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar # FIXME: it would be better to turn this into a link to the target instead. 894967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar comment = re.sub(r'\\see', r'See also:', comment) 904967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar 9195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # Delete the doxygen command and the following whitespace. 9295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek comment = re.sub(r'\\[^\s]+\s+', r'', comment) 9395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return comment 9495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 9595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef unify_arguments(args): 9695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """Gets rid of anything the user doesn't care about in the argument list.""" 9795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek args = re.sub(r'internal::', r'', args) 984967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar args = re.sub(r'const\s+(.*)&', r'\1 ', args) 9995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek args = re.sub(r'&', r' ', args) 10095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek args = re.sub(r'(^|\s)M\d?(\s)', r'\1Matcher<*>\2', args) 10195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return args 10295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 10395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef add_matcher(result_type, name, args, comment, is_dyncast=False): 10495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """Adds a matcher to one of our categories.""" 10595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if name == 'id': 10695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # FIXME: Figure out whether we want to support the 'id' matcher. 10795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return 10895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek matcher_id = '%s%d' % (name, ids[name]) 10995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek ids[name] += 1 11095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek args = unify_arguments(args) 11195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek matcher_html = TD_TEMPLATE % { 11295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 'result': esc('Matcher<%s>' % result_type), 11395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 'name': name, 11495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 'args': esc(args), 11595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 'comment': esc(strip_doxygen(comment)), 11695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 'id': matcher_id, 11795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek } 11895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if is_dyncast: 11995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek node_matchers[result_type + name] = matcher_html 12095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # Use a heuristic to figure out whether a matcher is a narrowing or 12195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # traversal matcher. By default, matchers that take other matchers as 12295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # arguments (and are not node matchers) do traversal. We specifically 12395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # exclude known narrowing matchers that also take other matchers as 12495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # arguments. 12595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek elif ('Matcher<' not in args or 12695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek name in ['allOf', 'anyOf', 'anything', 'unless']): 127651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines narrowing_matchers[result_type + name + esc(args)] = matcher_html 12895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 129651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines traversal_matchers[result_type + name + esc(args)] = matcher_html 13095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 13195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef act_on_decl(declaration, comment, allowed_types): 13295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """Parse the matcher out of the given declaration and comment. 13395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 13495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek If 'allowed_types' is set, it contains a list of node types the matcher 13595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek can match on, as extracted from the static type asserts in the matcher 13695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek definition. 13795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """ 13895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if declaration.strip(): 13995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # Node matchers are defined by writing: 14095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name; 14141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek m = re.match(r""".*Variadic(?:DynCast)?AllOfMatcher\s*< 14241df16e2a835f547b9384643e1804e75940e74ddManuel Klimek \s*([^\s,]+)\s*(?:, 14341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek \s*([^\s>]+)\s*)?> 14495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek \s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X) 14595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if m: 14695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek result, inner, name = m.groups() 14741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek if not inner: 14841df16e2a835f547b9384643e1804e75940e74ddManuel Klimek inner = result 14995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek add_matcher(result, name, 'Matcher<%s>...' % inner, 15095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek comment, is_dyncast=True) 15195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return 15295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 15395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # Parse the various matcher definition macros. 15441df16e2a835f547b9384643e1804e75940e74ddManuel Klimek m = re.match(""".*AST_TYPE_MATCHER\( 15541df16e2a835f547b9384643e1804e75940e74ddManuel Klimek \s*([^\s,]+\s*), 15641df16e2a835f547b9384643e1804e75940e74ddManuel Klimek \s*([^\s,]+\s*) 15741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek \)\s*;\s*$""", declaration, flags=re.X) 15841df16e2a835f547b9384643e1804e75940e74ddManuel Klimek if m: 15941df16e2a835f547b9384643e1804e75940e74ddManuel Klimek inner, name = m.groups() 16041df16e2a835f547b9384643e1804e75940e74ddManuel Klimek add_matcher('Type', name, 'Matcher<%s>...' % inner, 16141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek comment, is_dyncast=True) 1622c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek # FIXME: re-enable once we have implemented casting on the TypeLoc 1632c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek # hierarchy. 1642c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek # add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner, 1652c4b2e42c5af83dc2138ecceab7f492fe9d6c555Manuel Klimek # comment, is_dyncast=True) 16641df16e2a835f547b9384643e1804e75940e74ddManuel Klimek return 16741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek 16841df16e2a835f547b9384643e1804e75940e74ddManuel Klimek m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER\( 16941df16e2a835f547b9384643e1804e75940e74ddManuel Klimek \s*([^\s,]+\s*), 1703f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen \s*(?:[^\s,]+\s*), 1713ea9e33ea25e0c2b12db56418ba3f994eb662c04Pirama Arumuga Nainar \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\) 17241df16e2a835f547b9384643e1804e75940e74ddManuel Klimek \)\s*;\s*$""", declaration, flags=re.X) 17341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek if m: 17487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar loc, name, results = m.groups()[0:3] 1753f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen result_types = [r.strip() for r in results.split(',')] 1763f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen 1773f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen comment_result_types = extract_result_types(comment) 1783f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen if (comment_result_types and 1793f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen sorted(result_types) != sorted(comment_result_types)): 1803f84bb341bfb1312842b09db71d76bc3898ba247Samuel Benzaquen raise Exception('Inconsistent documentation for: %s' % name) 18141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek for result_type in result_types: 18241df16e2a835f547b9384643e1804e75940e74ddManuel Klimek add_matcher(result_type, name, 'Matcher<Type>', comment) 18341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek if loc: 18441df16e2a835f547b9384643e1804e75940e74ddManuel Klimek add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>', 18541df16e2a835f547b9384643e1804e75940e74ddManuel Klimek comment) 18641df16e2a835f547b9384643e1804e75940e74ddManuel Klimek return 18741df16e2a835f547b9384643e1804e75940e74ddManuel Klimek 188ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\( 189ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen \s*([^\s,]+)\s*, 1903ea9e33ea25e0c2b12db56418ba3f994eb662c04Pirama Arumuga Nainar \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\) 191ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen (?:,\s*([^\s,]+)\s* 192ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen ,\s*([^\s,]+)\s*)? 193ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen (?:,\s*([^\s,]+)\s* 194ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen ,\s*([^\s,]+)\s*)? 195ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen (?:,\s*\d+\s*)? 196ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen \)\s*{\s*$""", declaration, flags=re.X) 197ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen 198ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen if m: 19987d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar p, n, name, results = m.groups()[0:4] 20087d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar args = m.groups()[4:] 201ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen result_types = [r.strip() for r in results.split(',')] 202ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen if allowed_types and allowed_types != result_types: 203ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen raise Exception('Inconsistent documentation for: %s' % name) 204ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen if n not in ['', '2']: 205ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen raise Exception('Cannot parse "%s"' % declaration) 206ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen args = ', '.join('%s %s' % (args[i], args[i+1]) 207ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen for i in range(0, len(args), 2) if args[i]) 208ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen for result_type in result_types: 209ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen add_matcher(result_type, name, args, comment) 210ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen return 211ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen 212651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\( 213651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines (?:\s*([^\s,]+)\s*,)? 214651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines \s*([^\s,]+)\s* 215651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines (?:,\s*([^\s,]+)\s* 216651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines ,\s*([^\s,]+)\s*)? 217651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines (?:,\s*([^\s,]+)\s* 218651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines ,\s*([^\s,]+)\s*)? 219651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines (?:,\s*\d+\s*)? 220651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines \)\s*{\s*$""", declaration, flags=re.X) 221651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if m: 222651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines p, n, result, name = m.groups()[0:4] 223651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines args = m.groups()[4:] 224651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if n not in ['', '2']: 225651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines raise Exception('Cannot parse "%s"' % declaration) 226651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines args = ', '.join('%s %s' % (args[i], args[i+1]) 227651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines for i in range(0, len(args), 2) if args[i]) 228651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines add_matcher(result, name, args, comment) 229651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return 230651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 231ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen m = re.match(r"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\( 23295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek (?:\s*([^\s,]+)\s*,)? 23395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek \s*([^\s,]+)\s* 2344967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar (?:,\s*([^,]+)\s* 23595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek ,\s*([^\s,]+)\s*)? 23695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek (?:,\s*([^\s,]+)\s* 23795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek ,\s*([^\s,]+)\s*)? 238415514d5fbc2761b7f2938aa9112b079249820d6Manuel Klimek (?:,\s*\d+\s*)? 23995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek \)\s*{\s*$""", declaration, flags=re.X) 24095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if m: 241ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen p, n, result, name = m.groups()[0:4] 242ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen args = m.groups()[4:] 24395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if not result: 24495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if not allowed_types: 24595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek raise Exception('Did not find allowed result types for: %s' % name) 24695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek result_types = allowed_types 24795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 24895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek result_types = [result] 24995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if n not in ['', '2']: 25095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek raise Exception('Cannot parse "%s"' % declaration) 25195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek args = ', '.join('%s %s' % (args[i], args[i+1]) 25295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek for i in range(0, len(args), 2) if args[i]) 25395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek for result_type in result_types: 25495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek add_matcher(result_type, name, args, comment) 25595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return 25695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 257ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen # Parse ArgumentAdapting matchers. 258ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen m = re.match( 2596c1dc7870f457803a9b256ed868da82532be820bSamuel Benzaquen r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*(?:LLVM_ATTRIBUTE_UNUSED\s*) 2606c1dc7870f457803a9b256ed868da82532be820bSamuel Benzaquen ([a-zA-Z]*)\s*=\s*{};$""", 261ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen declaration, flags=re.X) 262ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen if m: 263ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen name = m.groups()[0] 264ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen add_matcher('*', name, 'Matcher<*>', comment) 265ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen return 266ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen 2674967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar # Parse Variadic functions. 2684967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar m = re.match( 2694967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar r"""^.*internal::VariadicFunction\s*<\s*([^,]+),\s*([^,]+),\s*[^>]+>\s* 2704967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar ([a-zA-Z]*)\s*=\s*{.*};$""", 2714967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar declaration, flags=re.X) 2724967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar if m: 2734967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar result, arg, name = m.groups()[:3] 2744967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar add_matcher(result, name, '%s, ..., %s' % (arg, arg), comment) 2754967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar return 2764967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar 277d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen # Parse Variadic operator matchers. 278d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen m = re.match( 279651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s>]+)\s*>\s* 280651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines ([a-zA-Z]*)\s*=\s*{.*};$""", 281d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen declaration, flags=re.X) 282d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen if m: 283651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines min_args, max_args, name = m.groups()[:3] 284651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if max_args == '1': 285651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines add_matcher('*', name, 'Matcher<*>', comment) 286651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return 287651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines elif max_args == 'UINT_MAX': 288651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines add_matcher('*', name, 'Matcher<*>, ..., Matcher<*>', comment) 289651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return 290d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen 291ee0da9520fe94f701240e9e1c97773ee412be102Samuel Benzaquen 29295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # Parse free standing matcher functions, like: 29395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) { 29495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek m = re.match(r"""^\s*(.*)\s+ 29595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek ([^\s\(]+)\s*\( 29695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek (.*) 29795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek \)\s*{""", declaration, re.X) 29895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if m: 29995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek result, name, args = m.groups() 30095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek args = ', '.join(p.strip() for p in args.split(',')) 30141df16e2a835f547b9384643e1804e75940e74ddManuel Klimek m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result) 30295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if m: 30341df16e2a835f547b9384643e1804e75940e74ddManuel Klimek result_types = [m.group(2)] 30495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 30595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek result_types = extract_result_types(comment) 30695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if not result_types: 30795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if not comment: 30895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek # Only overloads don't have their own doxygen comments; ignore those. 30995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek print 'Ignoring "%s"' % name 31095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 31195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek print 'Cannot determine result type for "%s"' % name 31295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 31395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek for result_type in result_types: 31495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek add_matcher(result_type, name, args, comment) 31595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 31695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek print '*** Unparsable: "' + declaration + '" ***' 31795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 31895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdef sort_table(matcher_type, matcher_map): 31995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek """Returns the sorted html table for the given row map.""" 32095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek table = '' 32195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek for key in sorted(matcher_map.keys()): 32295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek table += matcher_map[key] + '\n' 32395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek return ('<!-- START_%(type)s_MATCHERS -->\n' + 32495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek '%(table)s' + 32595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek '<!--END_%(type)s_MATCHERS -->') % { 32695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 'type': matcher_type, 32795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 'table': table, 32895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek } 32995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 33095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# Parse the ast matchers. 33195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# We alternate between two modes: 33295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# body = True: We parse the definition of a matcher. We need 33395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# to parse the full definition before adding a matcher, as the 33495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# definition might contain static asserts that specify the result 33595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# type. 33695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek# body = False: We parse the comments and declaration of the matcher. 33795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekcomment = '' 33895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekdeclaration = '' 33995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekallowed_types = [] 34095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekbody = False 34195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekfor line in open(MATCHERS_FILE).read().splitlines(): 34295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if body: 34395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if line.strip() and line[0] == '}': 34495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if declaration: 34595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek act_on_decl(declaration, comment, allowed_types) 34695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek comment = '' 34795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek declaration = '' 34895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek allowed_types = [] 34995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek body = False 35095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 35195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek m = re.search(r'is_base_of<([^,]+), NodeType>', line) 35295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if m and m.group(1): 35395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek allowed_types += [m.group(1)] 35495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek continue 35595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if line.strip() and line.lstrip()[0] == '/': 35695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek comment += re.sub(r'/+\s?', '', line) + '\n' 35795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 35895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek declaration += ' ' + line 35995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if ((not line.strip()) or 36095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek line.rstrip()[-1] == ';' or 361d36e46350b50907425bba0db1b3ddfb46cc1637fSamuel Benzaquen (line.rstrip()[-1] == '{' and line.rstrip()[-3:] != '= {')): 36295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek if line.strip() and line.rstrip()[-1] == '{': 36395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek body = True 36495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek else: 36595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek act_on_decl(declaration, comment, allowed_types) 36695a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek comment = '' 36795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek declaration = '' 36895a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek allowed_types = [] 36995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 37095a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknode_matcher_table = sort_table('DECL', node_matchers) 37195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimeknarrowing_matcher_table = sort_table('NARROWING', narrowing_matchers) 37295a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimektraversal_matcher_table = sort_table('TRAVERSAL', traversal_matchers) 37395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 37495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = open('../LibASTMatchersReference.html').read() 37595a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->', 3764967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar node_matcher_table, reference, flags=re.S) 37795a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->', 3784967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar narrowing_matcher_table, reference, flags=re.S) 37995a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimekreference = re.sub(r'<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->', 3804967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar traversal_matcher_table, reference, flags=re.S) 38195a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 38287d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainarwith open('../LibASTMatchersReference.html', 'wb') as output: 38395a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek output.write(reference) 38495a58d2f6ef9149dd50d679a7ee70d0685c38d27Manuel Klimek 385