1#!/usr/bin/env python2.7
2
3"""A script to generate FileCheck statements for regression tests.
4
5This script is a utility to update LLVM opt or llc test cases with new
6FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
8
9Example usage:
10$ update_test_checks.py --tool=../bin/opt test/foo.ll
11
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14   in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17   Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19   running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
30"""
31
32import argparse
33import itertools
34import os         # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
41ADVERT = '; NOTE: Assertions have been autogenerated by '
42
43# RegEx: this is where the magic happens.
44
45SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
46SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
48SCRUB_X86_SHUFFLES_RE = (
49    re.compile(
50        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
51        flags=re.M))
52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
54SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
55SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
56SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
57
58RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
59IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
60LLC_FUNCTION_RE = re.compile(
61    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
62    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
63    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
64    flags=(re.M | re.S))
65OPT_FUNCTION_RE = re.compile(
66    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
67    r'(\s+)?[^{]*\{\n(?P<body>.*?)\}',
68    flags=(re.M | re.S))
69CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
70CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
71IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =')
72
73
74# Invoke the tool that is being tested.
75def invoke_tool(args, cmd_args, ir):
76  with open(ir) as ir_file:
77    stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
78                                     shell=True, stdin=ir_file)
79  # Fix line endings to unix CR style.
80  stdout = stdout.replace('\r\n', '\n')
81  return stdout
82
83
84# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
85def scrub_asm(asm):
86  # Detect shuffle asm comments and hide the operands in favor of the comments.
87  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
88  # Generically match the stack offset of a memory operand.
89  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
90  # Generically match a RIP-relative memory operand.
91  asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
92  # Generically match a LCP symbol.
93  asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
94  # Strip kill operands inserted into the asm.
95  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
96  return asm
97
98
99def scrub_body(body, tool_basename):
100  # Scrub runs of whitespace out of the assembly, but leave the leading
101  # whitespace in place.
102  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
103  # Expand the tabs used for indentation.
104  body = string.expandtabs(body, 2)
105  # Strip trailing whitespace.
106  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
107  if tool_basename == "llc":
108    body = scrub_asm(body)
109  return body
110
111
112# Build up a dictionary of all the function bodies.
113def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
114  if tool_basename == "llc":
115    func_regex = LLC_FUNCTION_RE
116  else:
117    func_regex = OPT_FUNCTION_RE
118  for m in func_regex.finditer(raw_tool_output):
119    if not m:
120      continue
121    func = m.group('func')
122    scrubbed_body = scrub_body(m.group('body'), tool_basename)
123    if func.startswith('stress'):
124      # We only use the last line of the function body for stress tests.
125      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
126    if verbose:
127      print >>sys.stderr, 'Processing function: ' + func
128      for l in scrubbed_body.splitlines():
129        print >>sys.stderr, '  ' + l
130    for prefix in prefixes:
131      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
132        if prefix == prefixes[-1]:
133          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
134                               'same prefix: %r!' % (prefix,))
135        else:
136          func_dict[prefix][func] = None
137          continue
138
139      func_dict[prefix][func] = scrubbed_body
140
141
142# Create a FileCheck variable name based on an IR name.
143def get_value_name(var):
144  if var.isdigit():
145    var = 'TMP' + var
146  var = var.replace('.', '_')
147  return var.upper()
148
149
150# Create a FileCheck variable from regex.
151def get_value_definition(var):
152  return '[[' + get_value_name(var) + ':%.*]]'
153
154
155# Use a FileCheck variable.
156def get_value_use(var):
157  return '[[' + get_value_name(var) + ']]'
158
159
160# Replace IR value defs and uses with FileCheck variables.
161def genericize_check_lines(lines):
162  lines_with_def = []
163  vars_seen = []
164  for line in lines:
165    # An IR variable named '%.' matches the FileCheck regex string.
166    line = line.replace('%.', '%dot')
167    m = IR_VALUE_DEF_RE.match(line)
168    if m:
169      vars_seen.append(m.group(1))
170      line = line.replace('%' + m.group(1), get_value_definition(m.group(1)))
171
172    lines_with_def.append(line)
173
174  # A single def isn't worth replacing?
175  #if len(vars_seen) < 2:
176  #  return lines
177
178  output_lines = []
179  vars_seen.sort(key=len, reverse=True)
180  for line in lines_with_def:
181    for var in vars_seen:
182      line = line.replace('%' + var, get_value_use(var))
183    output_lines.append(line)
184
185  return output_lines
186
187
188def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
189  # Select a label format based on the whether we're checking asm or IR.
190  if tool_basename == "llc":
191    check_label_format = "; %s-LABEL: %s:"
192  else:
193    check_label_format = "; %s-LABEL: @%s("
194
195  printed_prefixes = []
196  for checkprefixes, _ in prefix_list:
197    for checkprefix in checkprefixes:
198      if checkprefix in printed_prefixes:
199        break
200      if not func_dict[checkprefix][func_name]:
201        continue
202      # Add some space between different check prefixes, but not after the last
203      # check line (before the test code).
204      #if len(printed_prefixes) != 0:
205      #  output_lines.append(';')
206      printed_prefixes.append(checkprefix)
207      output_lines.append(check_label_format % (checkprefix, func_name))
208      func_body = func_dict[checkprefix][func_name].splitlines()
209
210      # For IR output, change all defs to FileCheck variables, so we're immune
211      # to variable naming fashions.
212      if tool_basename == "opt":
213        func_body = genericize_check_lines(func_body)
214
215      # This could be selectively enabled with an optional invocation argument.
216      # Disabled for now: better to check everything. Be safe rather than sorry.
217
218      # Handle the first line of the function body as a special case because
219      # it's often just noise (a useless asm comment or entry label).
220      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
221      #  is_blank_line = True
222      #else:
223      #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
224      #  is_blank_line = False
225
226      # For llc tests, there may be asm directives between the label and the
227      # first checked line (most likely that first checked line is "# BB#0").
228      if tool_basename == "opt":
229        is_blank_line = False
230      else:
231        is_blank_line = True;
232
233      for func_line in func_body:
234        if func_line.strip() == '':
235          is_blank_line = True
236          continue
237        # Do not waste time checking IR comments.
238        if tool_basename == "opt":
239          func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
240
241        # Skip blank lines instead of checking them.
242        if is_blank_line == True:
243          output_lines.append('; %s:       %s' % (checkprefix, func_line))
244        else:
245          output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
246        is_blank_line = False
247
248      # Add space between different check prefixes and also before the first
249      # line of code in the test function.
250      output_lines.append(';')
251      break
252  return output_lines
253
254
255def should_add_line_to_output(input_line, prefix_set):
256  # Skip any blank comment lines in the IR.
257  if input_line.strip() == ';':
258    return False
259  # Skip any blank lines in the IR.
260  #if input_line.strip() == '':
261  #  return False
262  # And skip any CHECK lines. We're building our own.
263  m = CHECK_RE.match(input_line)
264  if m and m.group(1) in prefix_set:
265    return False
266
267  return True
268
269
270def main():
271  from argparse import RawTextHelpFormatter
272  parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
273  parser.add_argument('-v', '--verbose', action='store_true',
274                      help='Show verbose output')
275  parser.add_argument('--tool-binary', default='llc',
276                      help='The tool used to generate the test case')
277  parser.add_argument(
278      '--function', help='The function in the test file to update')
279  parser.add_argument('tests', nargs='+')
280  args = parser.parse_args()
281
282  autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
283
284  tool_basename = os.path.basename(args.tool_binary)
285  if (tool_basename != "llc" and tool_basename != "opt"):
286    print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
287    sys.exit(1)
288
289  for test in args.tests:
290    if args.verbose:
291      print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
292    with open(test) as f:
293      input_lines = [l.rstrip() for l in f]
294
295    run_lines = [m.group(1)
296                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
297    if args.verbose:
298      print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
299      for l in run_lines:
300        print >>sys.stderr, '  RUN: ' + l
301
302    prefix_list = []
303    for l in run_lines:
304      (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
305
306      if not tool_cmd.startswith(tool_basename + ' '):
307        print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
308        continue
309
310      if not filecheck_cmd.startswith('FileCheck '):
311        print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
312        continue
313
314      tool_cmd_args = tool_cmd[len(tool_basename):].strip()
315      tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
316
317      check_prefixes = [m.group(1)
318                        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
319      if not check_prefixes:
320        check_prefixes = ['CHECK']
321
322      # FIXME: We should use multiple check prefixes to common check lines. For
323      # now, we just ignore all but the last.
324      prefix_list.append((check_prefixes, tool_cmd_args))
325
326    func_dict = {}
327    for prefixes, _ in prefix_list:
328      for prefix in prefixes:
329        func_dict.update({prefix: dict()})
330    for prefixes, tool_args in prefix_list:
331      if args.verbose:
332        print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
333        print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
334
335      raw_tool_output = invoke_tool(args, tool_args, test)
336      build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
337
338    is_in_function = False
339    is_in_function_start = False
340    prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
341    if args.verbose:
342      print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
343    output_lines = []
344    output_lines.append(autogenerated_note)
345
346    for input_line in input_lines:
347      if is_in_function_start:
348        if input_line == '':
349          continue
350        if input_line.lstrip().startswith(';'):
351          m = CHECK_RE.match(input_line)
352          if not m or m.group(1) not in prefix_set:
353            output_lines.append(input_line)
354            continue
355
356        # Print out the various check lines here.
357        output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
358        is_in_function_start = False
359
360      if is_in_function:
361        if should_add_line_to_output(input_line, prefix_set) == True:
362          # This input line of the function body will go as-is into the output.
363          # Except make leading whitespace uniform: 2 spaces.
364          input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
365          output_lines.append(input_line)
366        else:
367          continue
368        if input_line.strip() == '}':
369          is_in_function = False
370        continue
371
372      # Discard any previous script advertising.
373      if input_line.startswith(ADVERT):
374        continue
375
376      # If it's outside a function, it just gets copied to the output.
377      output_lines.append(input_line)
378
379      m = IR_FUNCTION_RE.match(input_line)
380      if not m:
381        continue
382      name = m.group(1)
383      if args.function is not None and name != args.function:
384        # When filtering on a specific function, skip all others.
385        continue
386      is_in_function = is_in_function_start = True
387
388    if args.verbose:
389      print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
390
391    with open(test, 'wb') as f:
392      f.writelines([l + '\n' for l in output_lines])
393
394
395if __name__ == '__main__':
396  main()
397
398