1#!/usr/bin/env perl
2
3$flavour = shift;
4$output  = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
16
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
18				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order
19
20print<<___;
21.text
22
23.globl	OPENSSL_ia32_cpuid
24.type	OPENSSL_ia32_cpuid,\@function,1
25.align	16
26OPENSSL_ia32_cpuid:
27	# On Windows, $arg1 is rcx, but that will be clobbered. So make Windows
28	# use the same register as Unix.
29	mov	$arg1,%rdi
30	mov	%rbx,%r8		# save %rbx
31
32	xor	%eax,%eax
33	mov	%eax,8(%rdi)		# clear 3rd word
34	cpuid
35	mov	%eax,%r11d		# max value for standard query level
36
37	xor	%eax,%eax
38	cmp	\$0x756e6547,%ebx	# "Genu"
39	setne	%al
40	mov	%eax,%r9d
41	cmp	\$0x49656e69,%edx	# "ineI"
42	setne	%al
43	or	%eax,%r9d
44	cmp	\$0x6c65746e,%ecx	# "ntel"
45	setne	%al
46	or	%eax,%r9d		# 0 indicates Intel CPU
47	jz	.Lintel
48
49	cmp	\$0x68747541,%ebx	# "Auth"
50	setne	%al
51	mov	%eax,%r10d
52	cmp	\$0x69746E65,%edx	# "enti"
53	setne	%al
54	or	%eax,%r10d
55	cmp	\$0x444D4163,%ecx	# "cAMD"
56	setne	%al
57	or	%eax,%r10d		# 0 indicates AMD CPU
58	jnz	.Lintel
59
60	# AMD specific
61	# See http://developer.amd.com/wordpress/media/2012/10/254811.pdf (1)
62
63	mov	\$0x80000000,%eax
64	cpuid
65	# Returns "The largest CPUID extended function input value supported by
66	# the processor implementation." in EAX.
67	cmp	\$0x80000001,%eax
68	jb	.Lintel
69	mov	%eax,%r10d
70	mov	\$0x80000001,%eax
71	cpuid
72	# Returns feature bits in ECX. See page 20 of [1].
73	# TODO(fork): I think this should be a MOV.
74	or	%ecx,%r9d
75	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11
76
77	cmp	\$0x80000008,%r10d
78	jb	.Lintel
79
80	mov	\$0x80000008,%eax
81	cpuid
82	# Returns APIC ID and number of cores in ECX. See page 27 of [1].
83	movzb	%cl,%r10		# number of cores - 1
84	inc	%r10			# number of cores
85
86	mov	\$1,%eax
87	cpuid
88	# See page 13 of [1].
89	bt	\$28,%edx		# test hyper-threading bit
90	jnc	.Lgeneric
91	shr	\$16,%ebx		# number of logical processors
92	cmp	%r10b,%bl
93	ja	.Lgeneric
94	and	\$0xefffffff,%edx	# Clear hyper-threading bit.
95	jmp	.Lgeneric
96
97.Lintel:
98	cmp	\$4,%r11d
99	mov	\$-1,%r10d
100	jb	.Lnocacheinfo
101
102	mov	\$4,%eax
103	mov	\$0,%ecx		# query L1D
104	cpuid
105	mov	%eax,%r10d
106	shr	\$14,%r10d
107	and	\$0xfff,%r10d		# number of cores -1 per L1D
108
109	cmp	\$7,%r11d
110	jb	.Lnocacheinfo
111
112	mov	\$7,%eax
113	xor	%ecx,%ecx
114	cpuid
115	mov	%ebx,8(%rdi)
116
117.Lnocacheinfo:
118	mov	\$1,%eax
119	cpuid
120	# Gets feature information. See table 3-21 in the Intel manual.
121	and	\$0xbfefffff,%edx	# force reserved bits to 0
122	cmp	\$0,%r9d
123	jne	.Lnotintel
124	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
125.Lnotintel:
126	bt	\$28,%edx		# test hyper-threading bit
127	jnc	.Lgeneric
128	and	\$0xefffffff,%edx	# ~(1<<28) - clear hyper-threading.
129	cmp	\$0,%r10d
130	je	.Lgeneric
131
132	or	\$0x10000000,%edx	# 1<<28
133	shr	\$16,%ebx
134	cmp	\$1,%bl			# see if cache is shared
135	ja	.Lgeneric
136	and	\$0xefffffff,%edx	# ~(1<<28)
137.Lgeneric:
138	and	\$0x00000800,%r9d	# isolate AMD XOP flag
139	and	\$0xfffff7ff,%ecx
140	or	%ecx,%r9d		# merge AMD XOP flag
141
142	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
143	bt	\$27,%r9d		# check OSXSAVE bit
144	jnc	.Lclear_avx
145	xor	%ecx,%ecx		# XCR0
146	.byte	0x0f,0x01,0xd0		# xgetbv
147	and	\$6,%eax		# isolate XMM and YMM state support
148	cmp	\$6,%eax
149	je	.Ldone
150.Lclear_avx:
151	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
152	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
153	andl	\$0xffffffdf,8(%rdi)	# cleax AVX2, ~(1<<5)
154.Ldone:
155	movl	%r9d,4(%rdi)
156	movl	%r10d,0(%rdi)
157	mov	%r8,%rbx		# restore %rbx
158	ret
159.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
160
161___
162
163close STDOUT;	# flush
164