1#!/usr/bin/env perl
2
3$flavour = shift;
4$output  = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
16
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
18				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order
19
20print<<___;
21.extern		OPENSSL_cpuid_setup
22.hidden		OPENSSL_cpuid_setup
23.section	.init
24	call	OPENSSL_cpuid_setup
25
26.hidden	OPENSSL_ia32cap_P
27.comm	OPENSSL_ia32cap_P,8,4
28
29.text
30
31.globl	OPENSSL_atomic_add
32.type	OPENSSL_atomic_add,\@abi-omnipotent
33.align	16
34OPENSSL_atomic_add:
35	movl	($arg1),%eax
36.Lspin:	leaq	($arg2,%rax),%r8
37	.byte	0xf0		# lock
38	cmpxchgl	%r8d,($arg1)
39	jne	.Lspin
40	movl	%r8d,%eax
41	.byte	0x48,0x98	# cltq/cdqe
42	ret
43.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
44
45.globl	OPENSSL_rdtsc
46.type	OPENSSL_rdtsc,\@abi-omnipotent
47.align	16
48OPENSSL_rdtsc:
49	rdtsc
50	shl	\$32,%rdx
51	or	%rdx,%rax
52	ret
53.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
54
55.globl	OPENSSL_ia32_cpuid
56.type	OPENSSL_ia32_cpuid,\@abi-omnipotent
57.align	16
58OPENSSL_ia32_cpuid:
59	mov	%rbx,%r8		# save %rbx
60
61	xor	%eax,%eax
62	cpuid
63	mov	%eax,%r11d		# max value for standard query level
64
65	xor	%eax,%eax
66	cmp	\$0x756e6547,%ebx	# "Genu"
67	setne	%al
68	mov	%eax,%r9d
69	cmp	\$0x49656e69,%edx	# "ineI"
70	setne	%al
71	or	%eax,%r9d
72	cmp	\$0x6c65746e,%ecx	# "ntel"
73	setne	%al
74	or	%eax,%r9d		# 0 indicates Intel CPU
75	jz	.Lintel
76
77	cmp	\$0x68747541,%ebx	# "Auth"
78	setne	%al
79	mov	%eax,%r10d
80	cmp	\$0x69746E65,%edx	# "enti"
81	setne	%al
82	or	%eax,%r10d
83	cmp	\$0x444D4163,%ecx	# "cAMD"
84	setne	%al
85	or	%eax,%r10d		# 0 indicates AMD CPU
86	jnz	.Lintel
87
88	# AMD specific
89	mov	\$0x80000000,%eax
90	cpuid
91	cmp	\$0x80000001,%eax
92	jb	.Lintel
93	mov	%eax,%r10d
94	mov	\$0x80000001,%eax
95	cpuid
96	or	%ecx,%r9d
97	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11
98
99	cmp	\$0x80000008,%r10d
100	jb	.Lintel
101
102	mov	\$0x80000008,%eax
103	cpuid
104	movzb	%cl,%r10		# number of cores - 1
105	inc	%r10			# number of cores
106
107	mov	\$1,%eax
108	cpuid
109	bt	\$28,%edx		# test hyper-threading bit
110	jnc	.Lgeneric
111	shr	\$16,%ebx		# number of logical processors
112	cmp	%r10b,%bl
113	ja	.Lgeneric
114	and	\$0xefffffff,%edx	# ~(1<<28)
115	jmp	.Lgeneric
116
117.Lintel:
118	cmp	\$4,%r11d
119	mov	\$-1,%r10d
120	jb	.Lnocacheinfo
121
122	mov	\$4,%eax
123	mov	\$0,%ecx		# query L1D
124	cpuid
125	mov	%eax,%r10d
126	shr	\$14,%r10d
127	and	\$0xfff,%r10d		# number of cores -1 per L1D
128
129.Lnocacheinfo:
130	mov	\$1,%eax
131	cpuid
132	and	\$0xbfefffff,%edx	# force reserved bits to 0
133	cmp	\$0,%r9d
134	jne	.Lnotintel
135	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
136	and	\$15,%ah
137	cmp	\$15,%ah		# examine Family ID
138	jne	.Lnotintel
139	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
140.Lnotintel:
141	bt	\$28,%edx		# test hyper-threading bit
142	jnc	.Lgeneric
143	and	\$0xefffffff,%edx	# ~(1<<28)
144	cmp	\$0,%r10d
145	je	.Lgeneric
146
147	or	\$0x10000000,%edx	# 1<<28
148	shr	\$16,%ebx
149	cmp	\$1,%bl			# see if cache is shared
150	ja	.Lgeneric
151	and	\$0xefffffff,%edx	# ~(1<<28)
152.Lgeneric:
153	and	\$0x00000800,%r9d	# isolate AMD XOP flag
154	and	\$0xfffff7ff,%ecx
155	or	%ecx,%r9d		# merge AMD XOP flag
156
157	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
158	bt	\$27,%r9d		# check OSXSAVE bit
159	jnc	.Lclear_avx
160	xor	%ecx,%ecx		# XCR0
161	.byte	0x0f,0x01,0xd0		# xgetbv
162	and	\$6,%eax		# isolate XMM and YMM state support
163	cmp	\$6,%eax
164	je	.Ldone
165.Lclear_avx:
166	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
167	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
168.Ldone:
169	shl	\$32,%r9
170	mov	%r10d,%eax
171	mov	%r8,%rbx		# restore %rbx
172	or	%r9,%rax
173	ret
174.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
175
176.globl  OPENSSL_cleanse
177.type   OPENSSL_cleanse,\@abi-omnipotent
178.align  16
179OPENSSL_cleanse:
180	xor	%rax,%rax
181	cmp	\$15,$arg2
182	jae	.Lot
183	cmp	\$0,$arg2
184	je	.Lret
185.Little:
186	mov	%al,($arg1)
187	sub	\$1,$arg2
188	lea	1($arg1),$arg1
189	jnz	.Little
190.Lret:
191	ret
192.align	16
193.Lot:
194	test	\$7,$arg1
195	jz	.Laligned
196	mov	%al,($arg1)
197	lea	-1($arg2),$arg2
198	lea	1($arg1),$arg1
199	jmp	.Lot
200.Laligned:
201	mov	%rax,($arg1)
202	lea	-8($arg2),$arg2
203	test	\$-8,$arg2
204	lea	8($arg1),$arg1
205	jnz	.Laligned
206	cmp	\$0,$arg2
207	jne	.Little
208	ret
209.size	OPENSSL_cleanse,.-OPENSSL_cleanse
210___
211
212print<<___ if (!$win64);
213.globl	OPENSSL_wipe_cpu
214.type	OPENSSL_wipe_cpu,\@abi-omnipotent
215.align	16
216OPENSSL_wipe_cpu:
217	pxor	%xmm0,%xmm0
218	pxor	%xmm1,%xmm1
219	pxor	%xmm2,%xmm2
220	pxor	%xmm3,%xmm3
221	pxor	%xmm4,%xmm4
222	pxor	%xmm5,%xmm5
223	pxor	%xmm6,%xmm6
224	pxor	%xmm7,%xmm7
225	pxor	%xmm8,%xmm8
226	pxor	%xmm9,%xmm9
227	pxor	%xmm10,%xmm10
228	pxor	%xmm11,%xmm11
229	pxor	%xmm12,%xmm12
230	pxor	%xmm13,%xmm13
231	pxor	%xmm14,%xmm14
232	pxor	%xmm15,%xmm15
233	xorq	%rcx,%rcx
234	xorq	%rdx,%rdx
235	xorq	%rsi,%rsi
236	xorq	%rdi,%rdi
237	xorq	%r8,%r8
238	xorq	%r9,%r9
239	xorq	%r10,%r10
240	xorq	%r11,%r11
241	leaq	8(%rsp),%rax
242	ret
243.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
244___
245print<<___ if ($win64);
246.globl	OPENSSL_wipe_cpu
247.type	OPENSSL_wipe_cpu,\@abi-omnipotent
248.align	16
249OPENSSL_wipe_cpu:
250	pxor	%xmm0,%xmm0
251	pxor	%xmm1,%xmm1
252	pxor	%xmm2,%xmm2
253	pxor	%xmm3,%xmm3
254	pxor	%xmm4,%xmm4
255	pxor	%xmm5,%xmm5
256	xorq	%rcx,%rcx
257	xorq	%rdx,%rdx
258	xorq	%r8,%r8
259	xorq	%r9,%r9
260	xorq	%r10,%r10
261	xorq	%r11,%r11
262	leaq	8(%rsp),%rax
263	ret
264.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
265___
266
267print<<___;
268.globl	OPENSSL_ia32_rdrand
269.type	OPENSSL_ia32_rdrand,\@abi-omnipotent
270.align	16
271OPENSSL_ia32_rdrand:
272	mov	\$8,%ecx
273.Loop_rdrand:
274	rdrand	%rax
275	jc	.Lbreak_rdrand
276	loop	.Loop_rdrand
277.Lbreak_rdrand:
278	cmp	\$0,%rax
279	cmove	%rcx,%rax
280	ret
281.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
282___
283
284close STDOUT;	# flush
285