1c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#!/usr/bin/env perl
2c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
3480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.orgpush(@INC, "${dir}perlasm", "perlasm");
5c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgrequire "x86asm.pl";
6c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
7c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&asm_init($ARGV[0],"x86cpuid");
8c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
9c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgfor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
10c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
11c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_begin("OPENSSL_ia32_cpuid");
12c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("edx","edx");
13c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pushf	();
14c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pop	("eax");
15c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("ecx","eax");
16c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax",1<<21);
17c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&push	("eax");
18c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&popf	();
19c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pushf	();
20c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pop	("eax");
21c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("ecx","eax");
22c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax","eax");
232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&bt	("ecx",21);
242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("nocpuid"));
25c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&cpuid	();
26480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("edi","eax");		# max value for standard query level
27480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
28c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax","eax");
29c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&cmp	("ebx",0x756e6547);	# "Genu"
30480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&setne	(&LB("eax"));
31c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("ebp","eax");
32c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&cmp	("edx",0x49656e69);	# "ineI"
33480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&setne	(&LB("eax"));
34c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&or	("ebp","eax");
35c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&cmp	("ecx",0x6c65746e);	# "ntel"
36480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&setne	(&LB("eax"));
37480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&or	("ebp","eax");		# 0 indicates Intel CPU
38480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jz	(&label("intel"));
39480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
40480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("ebx",0x68747541);	# "Auth"
41480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&setne	(&LB("eax"));
42480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("esi","eax");
43480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("edx",0x69746E65);	# "enti"
44480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&setne	(&LB("eax"));
45480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&or	("esi","eax");
46480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("ecx",0x444D4163);	# "cAMD"
47480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&setne	(&LB("eax"));
48480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&or	("esi","eax");		# 0 indicates AMD CPU
49480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jnz	(&label("intel"));
50480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
51480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	# AMD specific
52480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("eax",0x80000000);
53480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cpuid	();
542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	("eax",0x80000001);
552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("intel"));
562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esi","eax");
572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",0x80000001);
582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cpuid	();
592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&or	("ebp","ecx");
602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("ebp",1<<11|1);	# isolate XOP bit
612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	("esi",0x80000008);
62480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jb	(&label("intel"));
63480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
64480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("eax",0x80000008);
65480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cpuid	();
66480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&movz	("esi",&LB("ecx"));	# number of cores - 1
67480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&inc	("esi");		# number of cores
68480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
69480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("eax",1);
70480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cpuid	();
71480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&bt	("edx",28);
722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("generic"));
73480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&shr	("ebx",16);
74480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&and	("ebx",0xff);
75480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("ebx","esi");
762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ja	(&label("generic"));
77480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&and	("edx",0xefffffff);	# clear hyper-threading bit
782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("generic"));
79480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
80480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&set_label("intel");
81480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("edi",4);
82480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("edi",-1);
83480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jb	(&label("nocacheinfo"));
84480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
85480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("eax",4);
86480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("ecx",0);		# query L1D
87480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cpuid	();
88480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("edi","eax");
89480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&shr	("edi",14);
90480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&and	("edi",0xfff);		# number of cores -1 per L1D
91480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
92480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&set_label("nocacheinfo");
93c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("eax",1);
94c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&cpuid	();
952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
96c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&cmp	("ebp",0);
972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jne	(&label("notintel"));
982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
99480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&and	(&HB("eax"),15);	# familiy ID
100480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	(&HB("eax"),15);	# P4?
1012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jne	(&label("notintel"));
1022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
1032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("notintel");
104c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&bt	("edx",28);		# test hyper-threading bit
1052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("generic"));
106480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&and	("edx",0xefffffff);
107480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("edi",0);
1082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("generic"));
109480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
110480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&or	("edx",0x10000000);
111c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&shr	("ebx",16);
112480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	(&LB("ebx"),1);
1132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ja	(&label("generic"));
114c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
1152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("generic");
1172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("ebp",1<<11);		# isolate AMD XOP flag
1182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
1192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esi","edx");
1202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&or	("ebp","ecx");		# merge AMD XOP flag
1212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&bt	("ecx",27);		# check OSXSAVE bit
1232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("clear_avx"));
1242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor	("ecx","ecx");
1252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&data_byte(0x0f,0x01,0xd0);	# xgetbv
1262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("eax",6);
1272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	("eax",6);
1282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("done"));
1292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	("eax",2);
1302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("clear_avx"));
1312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("clear_xmm");
1322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
1332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("esi",0xfeffffff);	# clear FXSR
1342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("clear_avx");
1352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
136c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&set_label("done");
1372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax","esi");
1382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("edx","ebp");
1392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("nocpuid");
140c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_end("OPENSSL_ia32_cpuid");
141c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
142c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&external_label("OPENSSL_ia32cap_P");
143c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
144c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
145c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax","eax");
146c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("edx","edx");
147c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&picmeup("ecx","OPENSSL_ia32cap_P");
148c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&bt	(&DWP(0,"ecx"),4);
149c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jnc	(&label("notsc"));
150c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&rdtsc	();
151c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&set_label("notsc");
152c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
153c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_end_B("OPENSSL_rdtsc");
154c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
155c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
156c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# but it's safe to call it on any [supported] 32-bit platform...
157c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# Just check for [non-]zero return value...
158c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
159c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&picmeup("ecx","OPENSSL_ia32cap_P");
160c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&bt	(&DWP(0,"ecx"),4);
161c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jnc	(&label("nohalt"));	# no TSC
162c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
163c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&data_word(0x9058900e);		# push %cs; pop %eax
164c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&and	("eax",3);
165c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jnz	(&label("nohalt"));	# not enough privileges
166c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
167c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pushf	();
1687453c6c0666947e06d87565404f4397a4b387f91digit@chromium.org	&pop	("eax");
169c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&bt	("eax",9);
170c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jnc	(&label("nohalt"));	# interrupts are disabled
171c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
172c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&rdtsc	();
173c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&push	("edx");
174c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&push	("eax");
175c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&halt	();
176c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&rdtsc	();
177c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
178c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&sub	("eax",&DWP(0,"esp"));
179c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&sbb	("edx",&DWP(4,"esp"));
180c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&add	("esp",8);
181c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
182c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
183c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&set_label("nohalt");
184c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax","eax");
185c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("edx","edx");
186c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
187c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_end_B("OPENSSL_instrument_halt");
188c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
189c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# Essentially there is only one use for this function. Under DJGPP:
190c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#
191c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	#include <go32.h>
192c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	...
193c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	i=OPENSSL_far_spin(_dos_ds,0x46c);
194c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	...
195c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# to obtain the number of spins till closest timer interrupt.
196c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
197c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_begin_B("OPENSSL_far_spin");
198c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pushf	();
199c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pop	("eax")
200c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&bt	("eax",9);
201c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jnc	(&label("nospin"));	# interrupts are disabled
202c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
203c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("eax",&DWP(4,"esp"));
204c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("ecx",&DWP(8,"esp"));
205c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
206c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax","eax");
207c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("edx",&DWP(0,"ecx"));
208c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jmp	(&label("spin"));
209c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
210c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&align	(16);
211c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&set_label("spin");
212c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&inc	("eax");
213c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&cmp	("edx",&DWP(0,"ecx"));
214c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&je	(&label("spin"));
215c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
216c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&data_word (0x1f909090);	# pop	%ds
217c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
218c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
219c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&set_label("nospin");
220c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax","eax");
221c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("edx","edx");
222c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
223c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_end_B("OPENSSL_far_spin");
224c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
225c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
226c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("eax","eax");
227c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&xor	("edx","edx");
228c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&picmeup("ecx","OPENSSL_ia32cap_P");
229c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("ecx",&DWP(0,"ecx"));
230c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&bt	(&DWP(0,"ecx"),1);
231c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jnc	(&label("no_x87"));
232c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	if ($sse2) {
2332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
2342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org		&cmp	("ecx",1<<26|1<<24);
2352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org		&jne	(&label("no_sse2"));
236c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm0","xmm0");
237c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm1","xmm1");
238c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm2","xmm2");
239c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm3","xmm3");
240c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm4","xmm4");
241c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm5","xmm5");
242c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm6","xmm6");
243c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&pxor	("xmm7","xmm7");
244c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&set_label("no_sse2");
245c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	}
246c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	# just a bunch of fldz to zap the fp/mm bank followed by finit...
247c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
248c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&set_label("no_x87");
249c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&lea	("eax",&DWP(4,"esp"));
250c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
251c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_end_B("OPENSSL_wipe_cpu");
252c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
253c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_begin_B("OPENSSL_atomic_add");
254c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
255c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
256c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&push	("ebx");
257c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&nop	();
258c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("eax",&DWP(0,"edx"));
259c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&set_label("spin");
260c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&lea	("ebx",&DWP(0,"eax","ecx"));
261c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&nop	();
262c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
263c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&jne	(&label("spin"));
264c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("eax","ebx");	# OpenSSL expects the new value
265c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pop	("ebx");
266c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
267c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_end_B("OPENSSL_atomic_add");
268c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
269c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# This function can become handy under Win32 in situations when
270c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# we don't know which calling convention, __stdcall or __cdecl(*),
271c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# indirect callee is using. In C it can be deployed as
272c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#
273c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#ifdef OPENSSL_CPUID_OBJ
274c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	type OPENSSL_indirect_call(void *f,...);
275c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	...
276c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	OPENSSL_indirect_call(func,[up to $max arguments]);
277c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#endif
278c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#
279c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# (*)	it's designed to work even for __fastcall if number of
280c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#	arguments is 1 or 2!
281c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_begin_B("OPENSSL_indirect_call");
282c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	{
2837453c6c0666947e06d87565404f4397a4b387f91digit@chromium.org	my ($max,$i)=(7,);	# $max has to be chosen as 4*n-1
284c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org				# in order to preserve eventual
285c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org				# stack alignment
286c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&push	("ebp");
287c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("ebp","esp");
288c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&sub	("esp",$max*4);
289c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("ecx",&DWP(12,"ebp"));
290c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	(&DWP(0,"esp"),"ecx");
291c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("edx",&DWP(16,"ebp"));
292c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	(&DWP(4,"esp"),"edx");
293c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	for($i=2;$i<$max;$i++)
294c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		{
295c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		# Some copies will be redundant/bogus...
296c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&mov	("eax",&DWP(12+$i*4,"ebp"));
297c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		&mov	(&DWP(0+$i*4,"esp"),"eax");
298c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		}
299c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&call_ptr	(&DWP(8,"ebp"));# make the call...
300c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&mov	("esp","ebp");	# ... and just restore the stack pointer
301c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org				# without paying attention to what we called,
302c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org				# (__cdecl *func) or (__stdcall *one).
303c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&pop	("ebp");
304c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	&ret	();
305c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	}
306c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&function_end_B("OPENSSL_indirect_call");
307c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
308480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&function_begin_B("OPENSSL_cleanse");
309480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("edx",&wparam(0));
310480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	("ecx",&wparam(1));
311480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&xor	("eax","eax");
312480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("ecx",7);
313480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jae	(&label("lot"));
314480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("ecx",0);
315480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&je	(&label("ret"));
316480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&set_label("little");
317480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	(&BP(0,"edx"),"al");
318480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&sub	("ecx",1);
319480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&lea	("edx",&DWP(1,"edx"));
320480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jnz	(&label("little"));
321480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&set_label("ret");
322480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&ret	();
323480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
324480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&set_label("lot",16);
325480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&test	("edx",3);
326480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jz	(&label("aligned"));
327480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	(&BP(0,"edx"),"al");
328480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&lea	("ecx",&DWP(-1,"ecx"));
329480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&lea	("edx",&DWP(1,"edx"));
330480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jmp	(&label("lot"));
331480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&set_label("aligned");
332480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&mov	(&DWP(0,"edx"),"eax");
333480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&lea	("ecx",&DWP(-4,"ecx"));
334480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&test	("ecx",-4);
335480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&lea	("edx",&DWP(4,"edx"));
336480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jnz	(&label("aligned"));
337480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&cmp	("ecx",0);
338480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&jne	(&label("little"));
339480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org	&ret	();
340480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org&function_end_B("OPENSSL_cleanse");
341480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org
3422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("OPENSSL_ia32_rdrand");
3432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("ecx",8);
3442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("loop");
3452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&rdrand	("eax");
3462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jc	(&label("break"));
3472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&loop	(&label("loop"));
3482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("break");
3492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	("eax",0);
3502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmove	("eax","ecx");
3512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret	();
3522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("OPENSSL_ia32_rdrand");
3532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
354c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&initseg("OPENSSL_cpuid_setup");
355c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
356c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org&asm_finish();
357