1#!/usr/bin/env python2.7
2#
3# Copyright 2017 Google Inc.
4#
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8import re
9import subprocess
10import sys
11
12clang         = 'clang-5.0'
13objdump       = 'gobjdump'
14ccache        = 'ccache'
15stages        = 'src/jumper/SkJumper_stages.cpp'
16stages_lowp   = 'src/jumper/SkJumper_stages_lowp.cpp'
17generated     = 'src/jumper/SkJumper_generated.S'
18generated_win = 'src/jumper/SkJumper_generated_win.S'
19
20clang         = sys.argv[1] if len(sys.argv) > 1 else clang
21objdump       = sys.argv[2] if len(sys.argv) > 2 else objdump
22ccache        = sys.argv[3] if len(sys.argv) > 3 else ccache
23stages        = sys.argv[4] if len(sys.argv) > 4 else stages
24stages_lowp   = sys.argv[5] if len(sys.argv) > 5 else stages_lowp
25generated     = sys.argv[6] if len(sys.argv) > 6 else generated
26generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win
27
28clang = [ccache, clang, '-x', 'c++']
29
30
31cflags = ['-std=c++11', '-Os', '-DJUMPER_IS_OFFLINE',
32          '-momit-leaf-frame-pointer', '-ffp-contract=fast',
33          '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
34
35x86 = [ '-m32' ]
36win = ['-DWIN', '-mno-red-zone']
37sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
38subprocess.check_call(clang + cflags + sse2 +
39                      ['-c', stages] +
40                      ['-o', 'sse2.o'])
41subprocess.check_call(clang + cflags + sse2 + win +
42                      ['-c', stages] +
43                      ['-o', 'win_sse2.o'])
44subprocess.check_call(clang + cflags + sse2 + x86 +
45                      ['-c', stages] +
46                      ['-o', 'x86_sse2.o'])
47subprocess.check_call(clang + cflags + sse2 + win + x86 +
48                      ['-c', stages] +
49                      ['-o', 'win_x86_sse2.o'])
50
51subprocess.check_call(clang + cflags + sse2 +
52                      ['-c', stages_lowp] +
53                      ['-o', 'lowp_sse2.o'])
54subprocess.check_call(clang + cflags + sse2 + win +
55                      ['-c', stages_lowp] +
56                      ['-o', 'win_lowp_sse2.o'])
57subprocess.check_call(clang + cflags + sse2 + x86 +
58                      ['-c', stages_lowp] +
59                      ['-o', 'x86_lowp_sse2.o'])
60subprocess.check_call(clang + cflags + sse2 + win + x86 +
61                      ['-c', stages_lowp] +
62                      ['-o', 'win_x86_lowp_sse2.o'])
63
64sse41 = ['-msse4.1']
65subprocess.check_call(clang + cflags + sse41 +
66                      ['-c', stages] +
67                      ['-o', 'sse41.o'])
68subprocess.check_call(clang + cflags + sse41 + win +
69                      ['-c', stages] +
70                      ['-o', 'win_sse41.o'])
71
72subprocess.check_call(clang + cflags + sse41 +
73                      ['-c', stages_lowp] +
74                      ['-o', 'lowp_sse41.o'])
75subprocess.check_call(clang + cflags + sse41 + win +
76                      ['-c', stages_lowp] +
77                      ['-o', 'win_lowp_sse41.o'])
78
79avx = ['-mavx']
80subprocess.check_call(clang + cflags + avx +
81                      ['-c', stages] +
82                      ['-o', 'avx.o'])
83subprocess.check_call(clang + cflags + avx + win +
84                      ['-c', stages] +
85                      ['-o', 'win_avx.o'])
86
87hsw = ['-mavx2', '-mfma', '-mf16c']
88subprocess.check_call(clang + cflags + hsw +
89                      ['-c', stages] +
90                      ['-o', 'hsw.o'])
91subprocess.check_call(clang + cflags + hsw + win +
92                      ['-c', stages] +
93                      ['-o', 'win_hsw.o'])
94
95subprocess.check_call(clang + cflags + hsw +
96                      ['-c', stages_lowp] +
97                      ['-o', 'lowp_hsw.o'])
98subprocess.check_call(clang + cflags + hsw + win +
99                      ['-c', stages_lowp] +
100                      ['-o', 'win_lowp_hsw.o'])
101
102skx = ['-march=skylake-avx512']
103subprocess.check_call(clang + cflags + skx +
104                      ['-c', stages] +
105                      ['-o', 'skx.o'])
106
107# Merge x86-64 object files to deduplicate constants.
108# (No other platform has more than one specialization.)
109subprocess.check_call(['ld', '-r', '-o', 'merged.o',
110                       'skx.o', 'hsw.o', 'avx.o', 'sse41.o', 'sse2.o',
111                       'lowp_hsw.o', 'lowp_sse41.o', 'lowp_sse2.o'])
112subprocess.check_call(['ld', '-r', '-o', 'win_merged.o',
113                       'win_hsw.o', 'win_avx.o', 'win_sse41.o', 'win_sse2.o',
114                       'win_lowp_hsw.o', 'win_lowp_sse41.o', 'win_lowp_sse2.o'])
115
116subprocess.check_call(['ld', '-r', '-o', 'x86_merged.o',
117                       'x86_sse2.o',
118                       'x86_lowp_sse2.o'])
119subprocess.check_call(['ld', '-r', '-o', 'win_x86_merged.o',
120                       'win_x86_sse2.o',
121                       'win_x86_lowp_sse2.o'])
122
123def parse_object_file(dot_o, directive, target=None):
124  globl, hidden, label, comment, align = \
125      '.globl', 'HIDDEN', ':', '// ', 'BALIGN'
126  if 'win' in dot_o:
127    globl, hidden, label, comment, align = \
128        'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN '
129
130  cmd = [objdump]
131  if target:
132    cmd += ['--target', target]
133
134  # Look for sections we know we can't handle.
135  section_headers = subprocess.check_output(cmd + ['-h', dot_o])
136  for snippet in ['.rodata']:
137    if snippet in section_headers:
138      print >>sys.stderr, 'Found %s in section.' % snippet
139      assert snippet not in section_headers
140
141  if directive == '.long':
142    disassemble = ['-d', dot_o]
143    dehex = lambda h: '0x'+h
144  else:
145    # x86-64... as long as we're using %rip-relative addressing,
146    # literal sections should be fine to just dump in with .text.
147    disassemble = ['-d',               # DO NOT USE -D.
148                   '-z',               # Print zero bytes instead of ...
149                   '--insn-width=11',
150                   '-j', '.text',
151                   '-j', '.literal4',
152                   '-j', '.literal8',
153                   '-j', '.literal16',
154                   '-j', '.const',
155                   dot_o]
156    dehex = lambda h: str(int(h,16))
157
158  # Ok.  Let's disassemble.
159  for line in subprocess.check_output(cmd + disassemble).split('\n'):
160    line = line.strip()
161
162    if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
163      continue
164
165    # E.g. 00000000000003a4 <_load_f16>:
166    m = re.match('''[0-9a-f]+ <_?(.*)>:''', line)
167    if m:
168      print
169      sym = m.group(1)
170      if sym.startswith('.literal'):  # .literal4, .literal16, etc
171        print sym.replace('.literal', align)
172      elif sym.startswith('.const'):  # 32-byte constants
173        print align + '32'
174      elif not sym.startswith('sk_'):
175        print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym
176        assert sym.startswith('sk_')
177      else:  # a stage function
178        if hidden:
179          print hidden + ' _' + sym
180        print globl + ' _' + sym
181        if 'win' not in dot_o:
182          print 'FUNCTION(_' + sym + ')'
183        print '_' + sym + label
184      continue
185
186    columns = line.split('\t')
187   #print >>sys.stderr, columns
188    code = columns[1]
189    if len(columns) >= 4:
190      inst = columns[2]
191      args = columns[3]
192    else:
193      inst, args = columns[2], ''
194      if ' ' in columns[2]:
195        inst, args = columns[2].split(' ', 1)
196    code, inst, args = code.strip(), inst.strip(), args.strip()
197
198    hexed = ','.join(dehex(x) for x in code.split(' '))
199    print '  ' + directive + '  ' + hexed + ' '*(36-len(hexed)) + \
200          comment + inst + (' '*(14-len(inst)) + args if args else '')
201
202sys.stdout = open(generated, 'w')
203
204print '''# Copyright 2017 Google Inc.
205#
206# Use of this source code is governed by a BSD-style license that can be
207# found in the LICENSE file.
208
209# This file is generated semi-automatically with this command:
210#   $ src/jumper/build_stages.py
211'''
212print '#if defined(__MACH__)'
213print '    #define HIDDEN .private_extern'
214print '    #define FUNCTION(name)'
215print '    #define BALIGN4  .align 2'
216print '    #define BALIGN8  .align 3'
217print '    #define BALIGN16 .align 4'
218print '    #define BALIGN32 .align 5'
219print '#else'
220print '    .section .note.GNU-stack,"",%progbits'
221print '    #define HIDDEN .hidden'
222print '    #define FUNCTION(name) .type name,%function'
223print '    #define BALIGN4  .balign 4'
224print '    #define BALIGN8  .balign 8'
225print '    #define BALIGN16 .balign 16'
226print '    #define BALIGN32 .balign 32'
227print '#endif'
228
229print '.text'
230print '#if defined(__x86_64__)'
231print 'BALIGN32'
232parse_object_file('merged.o', '.byte')
233
234print '#elif defined(__i386__)'
235print 'BALIGN32'
236parse_object_file('x86_merged.o', '.byte')
237
238print '#endif'
239
240sys.stdout = open(generated_win, 'w')
241print '''; Copyright 2017 Google Inc.
242;
243; Use of this source code is governed by a BSD-style license that can be
244; found in the LICENSE file.
245
246; This file is generated semi-automatically with this command:
247;   $ src/jumper/build_stages.py
248'''
249print 'IFDEF RAX'
250print "_text32 SEGMENT ALIGN(32) 'CODE'"
251print 'ALIGN 32'
252parse_object_file('win_merged.o',   'DB')
253
254print 'ELSE'
255print '.MODEL FLAT,C'
256print "_text32 SEGMENT ALIGN(32) 'CODE'"
257print 'ALIGN 32'
258parse_object_file('win_x86_merged.o', 'DB')
259
260print 'ENDIF'
261print 'END'
262