1ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines#!/usr/bin/env python2.7
2ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
3ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines"""A test case update script.
4ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
5ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesThis script is a utility to update LLVM X86 'llc' based test cases with new
6ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesFileCheck patterns. It can either update all of the tests in the file or
7ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesa single test function.
8ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines"""
9ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
10ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesimport argparse
11ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesimport itertools
12ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesimport string
13ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesimport subprocess
14ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesimport sys
15ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesimport tempfile
16ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesimport re
17ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
18ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
19ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesdef llc(args, cmd_args, ir):
20ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  with open(ir) as ir_file:
21ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    stdout = subprocess.check_output(args.llc_binary + ' ' + cmd_args,
22ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                     shell=True, stdin=ir_file)
23ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return stdout
24ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
25ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
26ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesASM_SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
27ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesASM_SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
28ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesASM_SCRUB_SHUFFLES_RE = (
29ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    re.compile(
30ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$',
31ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        flags=re.M))
32ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesASM_SCRUB_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
33ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesASM_SCRUB_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
34ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesASM_SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
35ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
36ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
37ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesdef scrub_asm(asm):
38ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # Scrub runs of whitespace out of the assembly, but leave the leading
39ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # whitespace in place.
40ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm = ASM_SCRUB_WHITESPACE_RE.sub(r' ', asm)
41ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # Expand the tabs used for indentation.
42ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm = string.expandtabs(asm, 2)
43ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # Detect shuffle asm comments and hide the operands in favor of the comments.
44ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm = ASM_SCRUB_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
45ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # Generically match the stack offset of a memory operand.
46ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm = ASM_SCRUB_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
47ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # Generically match a RIP-relative memory operand.
48ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm = ASM_SCRUB_RIP_RE.sub(r'{{.*}}(%rip)', asm)
49ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # Strip kill operands inserted into the asm.
50ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm = ASM_SCRUB_KILL_COMMENT_RE.sub('', asm)
51ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  # Strip trailing whitespace.
52ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm = ASM_SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
53ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return asm
54ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
55ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
56ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesdef main():
57ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  parser = argparse.ArgumentParser(description=__doc__)
58ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  parser.add_argument('-v', '--verbose', action='store_true',
59ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                      help='Show verbose output')
60ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  parser.add_argument('--llc-binary', default='llc',
61ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                      help='The "llc" binary to use to generate the test case')
62ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  parser.add_argument(
63ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      '--function', help='The function in the test file to update')
64ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  parser.add_argument('tests', nargs='+')
65ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  args = parser.parse_args()
66ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
67ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  run_line_re = re.compile('^\s*;\s*RUN:\s*(.*)$')
68ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  ir_function_re = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
69ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  asm_function_re = re.compile(
70ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      r'^_?(?P<f>[^:]+):[ \t]*#+[ \t]*@(?P=f)\n[^:]*?'
71ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
72ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
73ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      flags=(re.M | re.S))
74ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  check_prefix_re = re.compile('--check-prefix=(\S+)')
75ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  check_re = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
76ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
77ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  for test in args.tests:
78ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if args.verbose:
79ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
80ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    with open(test) as f:
81ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      test_lines = [l.rstrip() for l in f]
82ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
83ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    run_lines = [m.group(1)
84ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                 for m in [run_line_re.match(l) for l in test_lines] if m]
85ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if args.verbose:
86ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
87ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      for l in run_lines:
88ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        print >>sys.stderr, '  RUN: ' + l
89ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
90ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    checks = []
91ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    for l in run_lines:
92ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      (llc_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
93ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if not llc_cmd.startswith('llc '):
94ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        print >>sys.stderr, 'WARNING: Skipping non-llc RUN line: ' + l
95ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        continue
96ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
97ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if not filecheck_cmd.startswith('FileCheck '):
98ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
99ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        continue
100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      llc_cmd_args = llc_cmd[len('llc'):].strip()
102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      llc_cmd_args = llc_cmd_args.replace('< %s', '').replace('%s', '').strip()
103ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
104ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      check_prefixes = [m.group(1)
105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                        for m in check_prefix_re.finditer(filecheck_cmd)]
106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if not check_prefixes:
107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        check_prefixes = ['CHECK']
108ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      # FIXME: We should use multiple check prefixes to common check lines. For
110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      # now, we just ignore all but the last.
111ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      checks.append((check_prefixes, llc_cmd_args))
112ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
113ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    asm = {}
114ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    for prefixes, _ in checks:
115ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      for prefix in prefixes:
116ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        asm.update({prefix: dict()})
117ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    for prefixes, llc_args in checks:
118ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if args.verbose:
119ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        print >>sys.stderr, 'Extracted LLC cmd: llc ' + llc_args
120ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
121ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      raw_asm = llc(args, llc_args, test)
122ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      # Build up a dictionary of all the function bodies.
123ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      for m in asm_function_re.finditer(raw_asm):
124ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if not m:
125ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue
126ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        f = m.group('f')
127ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        f_asm = scrub_asm(m.group('body'))
128ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if f.startswith('stress'):
129ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          # We only use the last line of the asm for stress tests.
130ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          f_asm = '\n'.join(f_asm.splitlines()[-1:])
131ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if args.verbose:
132ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          print >>sys.stderr, 'Processing asm for function: ' + f
133ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          for l in f_asm.splitlines():
134ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            print >>sys.stderr, '  ' + l
135ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        for prefix in prefixes:
136ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          if f in asm[prefix] and asm[prefix][f] != f_asm:
137ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if prefix == prefixes[-1]:
138ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              print >>sys.stderr, ('WARNING: Found conflicting asm under the '
139ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                   'same prefix!')
140ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            else:
141ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              asm[prefix][f] = None
142ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              continue
143ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
144ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          asm[prefix][f] = f_asm
145ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
146ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    is_in_function = False
147ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    is_in_function_start = False
148ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    prefix_set = set([prefix for prefixes, _ in checks for prefix in prefixes])
149ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if args.verbose:
150ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
151ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    fixed_lines = []
152ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    for l in test_lines:
153ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if is_in_function_start:
154ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if l.lstrip().startswith(';'):
155ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          m = check_re.match(l)
156ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          if not m or m.group(1) not in prefix_set:
157ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            fixed_lines.append(l)
158ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            continue
159ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
160ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        # Print out the various check lines here
161ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        printed_prefixes = []
162ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        for prefixes, _ in checks:
163ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          for prefix in prefixes:
164ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if prefix in printed_prefixes:
165ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              break
166ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if not asm[prefix][name]:
167ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              continue
168ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if len(printed_prefixes) != 0:
169ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              fixed_lines.append(';')
170ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            printed_prefixes.append(prefix)
171ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            fixed_lines.append('; %s-LABEL: %s:' % (prefix, name))
172ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            asm_lines = asm[prefix][name].splitlines()
173ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            fixed_lines.append('; %s:       %s' % (prefix, asm_lines[0]))
174ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            for asm_line in asm_lines[1:]:
175ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              fixed_lines.append('; %s-NEXT:  %s' % (prefix, asm_line))
176ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            break
177ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        is_in_function_start = False
178ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
179ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if is_in_function:
180ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        # Skip any blank comment lines in the IR.
181ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if l.strip() == ';':
182ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue
183ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        # And skip any CHECK lines. We'll build our own.
184ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        m = check_re.match(l)
185ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if m and m.group(1) in prefix_set:
186ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue
187ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        # Collect the remaining lines in the function body and look for the end
188ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        # of the function.
189ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        fixed_lines.append(l)
190ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if l.strip() == '}':
191ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          is_in_function = False
192ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        continue
193ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
194ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      fixed_lines.append(l)
195ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
196ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      m = ir_function_re.match(l)
197ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if not m:
198ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        continue
199ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      name = m.group(1)
200ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if args.function is not None and name != args.function:
201ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        # When filtering on a specific function, skip all others.
202ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        continue
203ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      is_in_function = is_in_function_start = True
204ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
205ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if args.verbose:
206ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      print>>sys.stderr, 'Writing %d fixed lines to %s...' % (
207ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          len(fixed_lines), test)
208ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    with open(test, 'w') as f:
209ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      f.writelines([l + '\n' for l in fixed_lines])
210ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
211ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
212ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesif __name__ == '__main__':
213ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  main()
214