1#!/usr/bin/env python2.7 2# 3# Copyright 2017 Google Inc. 4# 5# Use of this source code is governed by a BSD-style license that can be 6# found in the LICENSE file. 7 8import re 9import subprocess 10import sys 11 12clang = 'clang-5.0' 13objdump = 'gobjdump' 14ccache = 'ccache' 15stages = 'src/jumper/SkJumper_stages.cpp' 16stages_lowp = 'src/jumper/SkJumper_stages_lowp.cpp' 17generated = 'src/jumper/SkJumper_generated.S' 18generated_win = 'src/jumper/SkJumper_generated_win.S' 19 20clang = sys.argv[1] if len(sys.argv) > 1 else clang 21objdump = sys.argv[2] if len(sys.argv) > 2 else objdump 22ccache = sys.argv[3] if len(sys.argv) > 3 else ccache 23stages = sys.argv[4] if len(sys.argv) > 4 else stages 24stages_lowp = sys.argv[5] if len(sys.argv) > 5 else stages_lowp 25generated = sys.argv[6] if len(sys.argv) > 6 else generated 26generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win 27 28clang = [ccache, clang, '-x', 'c++'] 29 30 31cflags = ['-std=c++11', '-Os', '-DJUMPER_IS_OFFLINE', 32 '-momit-leaf-frame-pointer', '-ffp-contract=fast', 33 '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables'] 34 35x86 = [ '-m32' ] 36win = ['-DWIN', '-mno-red-zone'] 37sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1'] 38subprocess.check_call(clang + cflags + sse2 + 39 ['-c', stages] + 40 ['-o', 'sse2.o']) 41subprocess.check_call(clang + cflags + sse2 + win + 42 ['-c', stages] + 43 ['-o', 'win_sse2.o']) 44subprocess.check_call(clang + cflags + sse2 + x86 + 45 ['-c', stages] + 46 ['-o', 'x86_sse2.o']) 47subprocess.check_call(clang + cflags + sse2 + win + x86 + 48 ['-c', stages] + 49 ['-o', 'win_x86_sse2.o']) 50 51subprocess.check_call(clang + cflags + sse2 + 52 ['-c', stages_lowp] + 53 ['-o', 'lowp_sse2.o']) 54subprocess.check_call(clang + cflags + sse2 + win + 55 ['-c', stages_lowp] + 56 ['-o', 'win_lowp_sse2.o']) 57subprocess.check_call(clang + cflags + sse2 + x86 + 58 ['-c', stages_lowp] + 59 ['-o', 'x86_lowp_sse2.o']) 60subprocess.check_call(clang + cflags + sse2 + win + x86 + 61 ['-c', stages_lowp] + 62 ['-o', 'win_x86_lowp_sse2.o']) 63 64sse41 = ['-msse4.1'] 65subprocess.check_call(clang + cflags + sse41 + 66 ['-c', stages] + 67 ['-o', 'sse41.o']) 68subprocess.check_call(clang + cflags + sse41 + win + 69 ['-c', stages] + 70 ['-o', 'win_sse41.o']) 71 72subprocess.check_call(clang + cflags + sse41 + 73 ['-c', stages_lowp] + 74 ['-o', 'lowp_sse41.o']) 75subprocess.check_call(clang + cflags + sse41 + win + 76 ['-c', stages_lowp] + 77 ['-o', 'win_lowp_sse41.o']) 78 79avx = ['-mavx'] 80subprocess.check_call(clang + cflags + avx + 81 ['-c', stages] + 82 ['-o', 'avx.o']) 83subprocess.check_call(clang + cflags + avx + win + 84 ['-c', stages] + 85 ['-o', 'win_avx.o']) 86 87hsw = ['-mavx2', '-mfma', '-mf16c'] 88subprocess.check_call(clang + cflags + hsw + 89 ['-c', stages] + 90 ['-o', 'hsw.o']) 91subprocess.check_call(clang + cflags + hsw + win + 92 ['-c', stages] + 93 ['-o', 'win_hsw.o']) 94 95subprocess.check_call(clang + cflags + hsw + 96 ['-c', stages_lowp] + 97 ['-o', 'lowp_hsw.o']) 98subprocess.check_call(clang + cflags + hsw + win + 99 ['-c', stages_lowp] + 100 ['-o', 'win_lowp_hsw.o']) 101 102skx = ['-march=skylake-avx512'] 103subprocess.check_call(clang + cflags + skx + 104 ['-c', stages] + 105 ['-o', 'skx.o']) 106 107# Merge x86-64 object files to deduplicate constants. 108# (No other platform has more than one specialization.) 109subprocess.check_call(['ld', '-r', '-o', 'merged.o', 110 'skx.o', 'hsw.o', 'avx.o', 'sse41.o', 'sse2.o', 111 'lowp_hsw.o', 'lowp_sse41.o', 'lowp_sse2.o']) 112subprocess.check_call(['ld', '-r', '-o', 'win_merged.o', 113 'win_hsw.o', 'win_avx.o', 'win_sse41.o', 'win_sse2.o', 114 'win_lowp_hsw.o', 'win_lowp_sse41.o', 'win_lowp_sse2.o']) 115 116subprocess.check_call(['ld', '-r', '-o', 'x86_merged.o', 117 'x86_sse2.o', 118 'x86_lowp_sse2.o']) 119subprocess.check_call(['ld', '-r', '-o', 'win_x86_merged.o', 120 'win_x86_sse2.o', 121 'win_x86_lowp_sse2.o']) 122 123def parse_object_file(dot_o, directive, target=None): 124 globl, hidden, label, comment, align = \ 125 '.globl', 'HIDDEN', ':', '// ', 'BALIGN' 126 if 'win' in dot_o: 127 globl, hidden, label, comment, align = \ 128 'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN ' 129 130 cmd = [objdump] 131 if target: 132 cmd += ['--target', target] 133 134 # Look for sections we know we can't handle. 135 section_headers = subprocess.check_output(cmd + ['-h', dot_o]) 136 for snippet in ['.rodata']: 137 if snippet in section_headers: 138 print >>sys.stderr, 'Found %s in section.' % snippet 139 assert snippet not in section_headers 140 141 if directive == '.long': 142 disassemble = ['-d', dot_o] 143 dehex = lambda h: '0x'+h 144 else: 145 # x86-64... as long as we're using %rip-relative addressing, 146 # literal sections should be fine to just dump in with .text. 147 disassemble = ['-d', # DO NOT USE -D. 148 '-z', # Print zero bytes instead of ... 149 '--insn-width=11', 150 '-j', '.text', 151 '-j', '.literal4', 152 '-j', '.literal8', 153 '-j', '.literal16', 154 '-j', '.const', 155 dot_o] 156 dehex = lambda h: str(int(h,16)) 157 158 # Ok. Let's disassemble. 159 for line in subprocess.check_output(cmd + disassemble).split('\n'): 160 line = line.strip() 161 162 if not line or line.startswith(dot_o) or line.startswith('Disassembly'): 163 continue 164 165 # E.g. 00000000000003a4 <_load_f16>: 166 m = re.match('''[0-9a-f]+ <_?(.*)>:''', line) 167 if m: 168 print 169 sym = m.group(1) 170 if sym.startswith('.literal'): # .literal4, .literal16, etc 171 print sym.replace('.literal', align) 172 elif sym.startswith('.const'): # 32-byte constants 173 print align + '32' 174 elif not sym.startswith('sk_'): 175 print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym 176 assert sym.startswith('sk_') 177 else: # a stage function 178 if hidden: 179 print hidden + ' _' + sym 180 print globl + ' _' + sym 181 if 'win' not in dot_o: 182 print 'FUNCTION(_' + sym + ')' 183 print '_' + sym + label 184 continue 185 186 columns = line.split('\t') 187 #print >>sys.stderr, columns 188 code = columns[1] 189 if len(columns) >= 4: 190 inst = columns[2] 191 args = columns[3] 192 else: 193 inst, args = columns[2], '' 194 if ' ' in columns[2]: 195 inst, args = columns[2].split(' ', 1) 196 code, inst, args = code.strip(), inst.strip(), args.strip() 197 198 hexed = ','.join(dehex(x) for x in code.split(' ')) 199 print ' ' + directive + ' ' + hexed + ' '*(36-len(hexed)) + \ 200 comment + inst + (' '*(14-len(inst)) + args if args else '') 201 202sys.stdout = open(generated, 'w') 203 204print '''# Copyright 2017 Google Inc. 205# 206# Use of this source code is governed by a BSD-style license that can be 207# found in the LICENSE file. 208 209# This file is generated semi-automatically with this command: 210# $ src/jumper/build_stages.py 211''' 212print '#if defined(__MACH__)' 213print ' #define HIDDEN .private_extern' 214print ' #define FUNCTION(name)' 215print ' #define BALIGN4 .align 2' 216print ' #define BALIGN8 .align 3' 217print ' #define BALIGN16 .align 4' 218print ' #define BALIGN32 .align 5' 219print '#else' 220print ' .section .note.GNU-stack,"",%progbits' 221print ' #define HIDDEN .hidden' 222print ' #define FUNCTION(name) .type name,%function' 223print ' #define BALIGN4 .balign 4' 224print ' #define BALIGN8 .balign 8' 225print ' #define BALIGN16 .balign 16' 226print ' #define BALIGN32 .balign 32' 227print '#endif' 228 229print '.text' 230print '#if defined(__x86_64__)' 231print 'BALIGN32' 232parse_object_file('merged.o', '.byte') 233 234print '#elif defined(__i386__)' 235print 'BALIGN32' 236parse_object_file('x86_merged.o', '.byte') 237 238print '#endif' 239 240sys.stdout = open(generated_win, 'w') 241print '''; Copyright 2017 Google Inc. 242; 243; Use of this source code is governed by a BSD-style license that can be 244; found in the LICENSE file. 245 246; This file is generated semi-automatically with this command: 247; $ src/jumper/build_stages.py 248''' 249print 'IFDEF RAX' 250print "_text32 SEGMENT ALIGN(32) 'CODE'" 251print 'ALIGN 32' 252parse_object_file('win_merged.o', 'DB') 253 254print 'ELSE' 255print '.MODEL FLAT,C' 256print "_text32 SEGMENT ALIGN(32) 'CODE'" 257print 'ALIGN 32' 258parse_object_file('win_x86_merged.o', 'DB') 259 260print 'ENDIF' 261print 'END' 262