1221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#!/usr/bin/env perl
2221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
3221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
4221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and
6221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further
7221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/.
8221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
9221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
10221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# SHA512 block transform for x86. September 2007.
11221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
12221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Performance in clock cycles per processed byte (less is better):
13221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
14221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#		Pentium	PIII	P4	AMD K8	Core2
15221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# gcc		100	75	116	54	66
16221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# icc		97	77	95	55	57
17221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# x86 asm	61	56	82	36	40
18221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# SSE2 asm	-	-	38	24	20
19221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# x86_64 asm(*)	-	-	30	10.0	10.5
20221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
21221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# (*) x86_64 assembler performance is presented for reference
22221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#     purposes.
23221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
24221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# IALU code-path is optimized for elder Pentiums. On vanilla Pentium
25221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# performance improvement over compiler generated code reaches ~60%,
261762a559ef393f9c15300398433598989033385fDavid 'Digit' Turner# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
27221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# to 50%, but it's less important as they are expected to execute SSE2
28221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# code-path, which is commonly ~2-3x faster [than compiler generated
29221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# code]. SSE2 code-path is as fast as original sha512-sse2.pl, even
30221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# though it does not use 128-bit operations. The latter means that
31221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# SSE2-aware kernel is no longer required to execute the code. Another
32221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# difference is that new code optimizes amount of writes, but at the
33221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# cost of increased data cache "footprint" by 1/2KB.
34221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
35221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
36221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrompush(@INC,"${dir}","${dir}../../perlasm");
37221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromrequire "x86asm.pl";
38221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
39221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
40221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
41221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$sse2=0;
42221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromfor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
43221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
44221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&external_label("OPENSSL_ia32cap_P") if ($sse2);
45221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
46221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Tlo=&DWP(0,"esp");	$Thi=&DWP(4,"esp");
47221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Alo=&DWP(8,"esp");	$Ahi=&DWP(8+4,"esp");
48221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Blo=&DWP(16,"esp");	$Bhi=&DWP(16+4,"esp");
49221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Clo=&DWP(24,"esp");	$Chi=&DWP(24+4,"esp");
50221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Dlo=&DWP(32,"esp");	$Dhi=&DWP(32+4,"esp");
51221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Elo=&DWP(40,"esp");	$Ehi=&DWP(40+4,"esp");
52221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Flo=&DWP(48,"esp");	$Fhi=&DWP(48+4,"esp");
53221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Glo=&DWP(56,"esp");	$Ghi=&DWP(56+4,"esp");
54221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Hlo=&DWP(64,"esp");	$Hhi=&DWP(64+4,"esp");
55221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$K512="ebp";
56221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
57221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Asse2=&QWP(0,"esp");
58221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Bsse2=&QWP(8,"esp");
59221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Csse2=&QWP(16,"esp");
60221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Dsse2=&QWP(24,"esp");
61221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Esse2=&QWP(32,"esp");
62221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Fsse2=&QWP(40,"esp");
63221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Gsse2=&QWP(48,"esp");
64221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Hsse2=&QWP(56,"esp");
65221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
66221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$A="mm0";	# B-D and
67221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$E="mm4";	# F-H are commonly loaded to respectively mm1-mm3 and
68221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		# mm5-mm7, but it's done on on-demand basis...
69221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
70221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromsub BODY_00_15_sse2 {
71221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    my $prefetch=shift;
72221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
73221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm5",$Fsse2);			# load f
74221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm6",$Gsse2);			# load g
75221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm7",$Hsse2);			# load h
76221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
77221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm1",$E);			# %mm1 is sliding right
78221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm2",$E);			# %mm2 is sliding left
79221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm1",14);
80221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($Esse2,$E);			# modulo-scheduled save e
81221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm2",23);
82221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm3","mm1");			# %mm3 is T1
83221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm1",4);
84221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm2");
85221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm2",23);
86221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm1");
87221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm1",23);
88221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm2");
89221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm2",4);
90221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm1");
91221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm7",QWP(0,$K512));		# h+=K512[i]
92221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm2");			# T1=Sigma1_512(e)
93221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
94221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm5","mm6");			# f^=g
95221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm1",$Bsse2);			# load b
96221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pand	("mm5",$E);			# f&=e
97221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm2",$Csse2);			# load c
98221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm5","mm6");			# f^=g
99221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($E,$Dsse2);			# e = load d
100221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm3","mm5");			# T1+=Ch(e,f,g)
101221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(0,"esp"),$A);		# modulo-scheduled save a
102221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm3","mm7");			# T1+=h
103221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
104221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm5",$A);			# %mm5 is sliding right
105221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm6",$A);			# %mm6 is sliding left
106221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm3",&QWP(8*9,"esp"));	# T1+=X[0]
107221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm5",28);
108221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	($E,"mm3");			# e += T1
109221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm6",25);
110221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm7","mm5");			# %mm7 is T2
111221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm5",6);
112221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm7","mm6");
113221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm6",5);
114221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm7","mm5");
115221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm5",5);
116221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm7","mm6");
117221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm6",6);
118221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm7","mm5");
119221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&sub	("esp",8);
120221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm7","mm6");			# T2=Sigma0_512(a)
121221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
122221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm5",$A);			# %mm5=a
123221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&por	($A,"mm2");			# a=a|c
124221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm6",&QWP(8*(9+16-14),"esp"))	if ($prefetch);
125221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pand	("mm5","mm2");			# %mm5=a&c
126221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pand	($A,"mm1");			# a=(a|c)&b
127221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm2",&QWP(8*(9+16-1),"esp"))	if ($prefetch);
128221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&por	("mm5",$A);			# %mm5=(a&c)|((a|c)&b)
129221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm7","mm5");			# T2+=Maj(a,b,c)
130221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($A,"mm3");			# a=T1
131221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
132221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&LB("edx"),&BP(0,$K512));
133221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	($A,"mm7");			# a+=T2
134221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	($K512,8);
135221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
136221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
137221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromsub BODY_00_15_x86 {
138221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
139221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
140221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
141221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",$Elo);
142221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",$Ehi);
143221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi","ecx");
144221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
14504ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("ecx",9);	# lo>>9
146221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi","edx");
14704ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("edx",9);	# hi>>9
148221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx","ecx");
149221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",14);	# lo<<14
150221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax","edx");
151221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",14);	# hi<<14
152221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","esi");
153221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
154221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",14-9);	# lo>>14
155221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");
156221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",14-9);	# hi>>14
157221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","ecx");
158221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",18-14);	# lo<<18
159221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edx");
160221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",18-14);	# hi<<18
161221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","esi");
162221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
163221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",18-14);	# lo>>18
164221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");
165221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",18-14);	# hi>>18
166221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","ecx");
167221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",23-18);	# lo<<23
168221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edx");
169221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",23-18);	# hi<<23
170221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","esi");
171221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edi");			# T1 = Sigma1(e)
172221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
173221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",$Flo);
174221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",$Fhi);
175221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi",$Glo);
176221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi",$Ghi);
177221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 &add	("eax",$Hlo);
178221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 &adc	("ebx",$Hhi);			# T1 += h
179221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ecx","esi");
180221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("edx","edi");
181221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&and	("ecx",$Elo);
182221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&and	("edx",$Ehi);
183221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 &add	("eax",&DWP(8*(9+15)+0,"esp"));
184221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 &adc	("ebx",&DWP(8*(9+15)+4,"esp"));	# T1 += X[0]
185221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ecx","esi");
186221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("edx","edi");			# Ch(e,f,g) = (f^g)&e)^g
187221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
188221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi",&DWP(0,$K512));
189221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi",&DWP(4,$K512));		# K[i]
190221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax","ecx");
191221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx","edx");			# T1 += Ch(e,f,g)
192221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",$Dlo);
193221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",$Dhi);
194221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax","esi");
195221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx","edi");			# T1 += K[i]
196221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	($Tlo,"eax");
197221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	($Thi,"ebx");			# put T1 away
198221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax","ecx");
199221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx","edx");			# d += T1
200221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
201221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
202221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
203221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
204221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",$Alo);
205221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",$Ahi);
206221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	($Dlo,"eax");
207221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	($Dhi,"ebx");
208221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi","ecx");
209221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
21004ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("ecx",2);	# lo>>2
211221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi","edx");
21204ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("edx",2);	# hi>>2
213221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx","ecx");
214221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",4);	# lo<<4
215221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax","edx");
216221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",4);	# hi<<4
217221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","esi");
218221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
219221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",7-2);	# lo>>7
220221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");
221221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",7-2);	# hi>>7
222221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","ecx");
223221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",25-4);	# lo<<25
224221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edx");
225221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",25-4);	# hi<<25
226221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","esi");
227221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
228221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",28-7);	# lo>>28
229221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edi");
230221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",28-7);	# hi>>28
231221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","ecx");
232221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",30-25);	# lo<<30
233221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edx");
234221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",30-25);	# hi<<30
235221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","esi");
236221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edi");			# Sigma0(a)
237221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
238221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",$Alo);
239221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",$Ahi);
240221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi",$Blo);
241221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi",$Bhi);
242221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax",$Tlo);
243221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx",$Thi);			# T1 = Sigma0(a)+T1
244221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&or	("ecx","esi");
245221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&or	("edx","edi");
246221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&and	("ecx",$Clo);
247221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&and	("edx",$Chi);
248221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&and	("esi",$Alo);
249221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&and	("edi",$Ahi);
250221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&or	("ecx","esi");
251221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&or	("edx","edi");			# Maj(a,b,c) = ((a|b)&c)|(a&b)
252221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
253221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax","ecx");
254221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx","edx");			# T1 += Maj(a,b,c)
255221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	($Tlo,"eax");
256221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	($Thi,"ebx");
257221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
258221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&LB("edx"),&BP(0,$K512));	# pre-fetch LSB of *K
259221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&sub	("esp",8);
260221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&lea	($K512,&DWP(8,$K512));		# K++
261221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
262221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
263221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
264221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&function_begin("sha512_block_data_order");
265221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi",wparam(0));	# ctx
266221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi",wparam(1));	# inp
267221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax",wparam(2));	# num
268221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx","esp");		# saved sp
269221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
270221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&call	(&label("pic_point"));	# make it PIC!
271221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("pic_point");
272221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&blindpop($K512);
273221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&lea	($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
274221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
275221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&sub	("esp",16);
276221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&and	("esp",-64);
277221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
278221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("eax",7);
279221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax","edi");
280221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(0,"esp"),"esi");	# ctx
281221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(4,"esp"),"edi");	# inp
282221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8,"esp"),"eax");	# inp+num*128
283221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(12,"esp"),"ebx");	# saved sp
284221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
285221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromif ($sse2) {
286221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
287221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bt	(&DWP(0,"edx"),26);
288221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&jnc	(&label("loop_x86"));
289221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
290221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	# load ctx->h[0-7]
291221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($A,&QWP(0,"esi"));
292221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm1",&QWP(8,"esi"));
293221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm2",&QWP(16,"esi"));
294221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm3",&QWP(24,"esi"));
295221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($E,&QWP(32,"esi"));
296221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm5",&QWP(40,"esi"));
297221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm6",&QWP(48,"esi"));
298221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm7",&QWP(56,"esi"));
299221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&sub	("esp",8*10);
300221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
301221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("loop_sse2",16);
302221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	# &movq	($Asse2,$A);
303221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($Bsse2,"mm1");
304221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($Csse2,"mm2");
305221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($Dsse2,"mm3");
306221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	# &movq	($Esse2,$E);
307221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($Fsse2,"mm5");
308221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($Gsse2,"mm6");
309221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	($Hsse2,"mm7");
310221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
311221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",&DWP(0,"edi"));
312221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",&DWP(4,"edi"));
313221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("edi",8);
314221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("ecx");
315221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("edx");
316221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8*9+4,"esp"),"ecx");
317221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8*9+0,"esp"),"edx");
318221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
319221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("00_14_sse2",16);
320221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax",&DWP(0,"edi"));
321221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx",&DWP(4,"edi"));
322221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("edi",8);
323221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("eax");
324221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("ebx");
325221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8*8+4,"esp"),"eax");
326221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8*8+0,"esp"),"ebx");
327221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
328221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&BODY_00_15_sse2();
329221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
330221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&cmp	(&LB("edx"),0x35);
331221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&jne	(&label("00_14_sse2"));
332221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
333221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&BODY_00_15_sse2(1);
334221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
335221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("16_79_sse2",16);
336221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#&movq	("mm2",&QWP(8*(9+16-1),"esp"));	#prefetched in BODY_00_15
337221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#&movq	("mm6",&QWP(8*(9+16-14),"esp"));
338221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm1","mm2");
339221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
340221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm2",1);
341221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm7","mm6");
342221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm6",6);
343221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm3","mm2");
344221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
345221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm2",7-1);
346221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm5","mm6");
347221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm6",19-6);
348221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm2");
349221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
350221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm2",8-7);
351221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm5","mm6");
352221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psrlq	("mm6",61-19);
353221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm2");
354221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
355221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm2",&QWP(8*(9+16),"esp"));
356221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
357221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm1",56);
358221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm5","mm6");
359221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm7",3);
360221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm1");
361221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
362221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm2",&QWP(8*(9+16-9),"esp"));
363221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
364221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm1",63-56);
365221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm5","mm7");
366221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&psllq	("mm7",45-3);
367221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm3","mm1");
368221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&pxor	("mm5","mm7");
369221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
370221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm3","mm5");
371221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm3","mm2");
372221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(8*9,"esp"),"mm3");
373221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
374221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&BODY_00_15_sse2(1);
375221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
376221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&cmp	(&LB("edx"),0x17);
377221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&jne	(&label("16_79_sse2"));
378221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
379221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	# &movq	($A,$Asse2);
380221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm1",$Bsse2);
381221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm2",$Csse2);
382221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm3",$Dsse2);
383221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	# &movq	($E,$Esse2);
384221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm5",$Fsse2);
385221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm6",$Gsse2);
386221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	("mm7",$Hsse2);
387221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
388221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	($A,&QWP(0,"esi"));
389221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm1",&QWP(8,"esi"));
390221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm2",&QWP(16,"esi"));
391221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm3",&QWP(24,"esi"));
392221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	($E,&QWP(32,"esi"));
393221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm5",&QWP(40,"esi"));
394221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm6",&QWP(48,"esi"));
395221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&paddq	("mm7",&QWP(56,"esi"));
396221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
397221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(0,"esi"),$A);
398221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(8,"esi"),"mm1");
399221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(16,"esi"),"mm2");
400221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(24,"esi"),"mm3");
401221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(32,"esi"),$E);
402221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(40,"esi"),"mm5");
403221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(48,"esi"),"mm6");
404221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&movq	(&QWP(56,"esi"),"mm7");
405221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
406221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("esp",8*80);			# destroy frame
407221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&sub	($K512,8*80);			# rewind K
408221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
409221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&cmp	("edi",&DWP(8*10+8,"esp"));	# are we done yet?
410221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&jb	(&label("loop_sse2"));
411221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
412221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&emms	();
413221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esp",&DWP(8*10+12,"esp"));	# restore sp
414221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&function_end_A();
415221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
416221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("loop_x86",16);
417221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    # copy input block to stack reversing byte and qword order
418221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    for ($i=0;$i<8;$i++) {
419221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax",&DWP($i*16+0,"edi"));
420221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx",&DWP($i*16+4,"edi"));
421221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",&DWP($i*16+8,"edi"));
422221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",&DWP($i*16+12,"edi"));
423221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("eax");
424221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("ebx");
425221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("ecx");
426221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&bswap	("edx");
427221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&push	("eax");
428221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&push	("ebx");
429221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&push	("ecx");
430221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&push	("edx");
431221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    }
432221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("edi",128);
433221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&sub	("esp",9*8);		# place for T,A,B,C,D,E,F,G,H
434221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8*(9+16)+4,"esp"),"edi");
435221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
436221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	# copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
437221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&lea	("edi",&DWP(8,"esp"));
438221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",16);
439221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xA5F3F689);		# rep movsd
440221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
441221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("00_15_x86",16);
442221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&BODY_00_15_x86();
443221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
444221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&cmp	(&LB("edx"),0x94);
445221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&jne	(&label("00_15_x86"));
446221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
447221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("16_79_x86",16);
448221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
449221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
450221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
451221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",&DWP(8*(9+15+16-1)+0,"esp"));
452221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",&DWP(8*(9+15+16-1)+4,"esp"));
453221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi","ecx");
454221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
45504ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("ecx",1);	# lo>>1
456221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi","edx");
45704ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("edx",1);	# hi>>1
458221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax","ecx");
459221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",24);	# lo<<24
460221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx","edx");
461221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",24);	# hi<<24
462221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","esi");
463221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
464221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",7-1);	# lo>>7
465221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");
466221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",7-1);	# hi>>7
467221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","ecx");
468221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",31-24);	# lo<<31
469221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edx");
470221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",25-24);	# hi<<25
471221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","esi");
472221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
473221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",8-7);	# lo>>8
474221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");
475221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",8-7);	# hi>>8
476221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","ecx");
477221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",31-25);	# hi<<31
478221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edx");
479221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");			# T1 = sigma0(X[-15])
480221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
481221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(0,"esp"),"eax");
482221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(4,"esp"),"ebx");		# put T1 away
483221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
484221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
485221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
486221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	#	HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
487221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",&DWP(8*(9+15+16-14)+0,"esp"));
488221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",&DWP(8*(9+15+16-14)+4,"esp"));
489221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi","ecx");
490221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
49104ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("ecx",6);	# lo>>6
492221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi","edx");
49304ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom	&shr	("edx",6);	# hi>>6
494221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax","ecx");
495221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",3);	# lo<<3
496221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx","edx");
497221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",3);	# hi<<3
498221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","esi");
499221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
500221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",19-6);	# lo>>19
501221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edi");
502221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",19-6);	# hi>>19
503221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","ecx");
504221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("esi",13-3);	# lo<<13
505221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","edx");
506221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",13-3);	# hi<<13
507221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","esi");
508221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
509221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("ecx",29-19);	# lo>>29
510221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");
511221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shr	("edx",29-19);	# hi>>29
512221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("ebx","ecx");
513221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&shl	("edi",26-13);	# hi<<26
514221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edx");
515221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&xor	("eax","edi");			# sigma1(X[-2])
516221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
517221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",&DWP(8*(9+15+16)+0,"esp"));
518221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",&DWP(8*(9+15+16)+4,"esp"));
519221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax",&DWP(0,"esp"));
520221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx",&DWP(4,"esp"));		# T1 = sigma1(X[-2])+T1
521221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi",&DWP(8*(9+15+16-9)+0,"esp"));
522221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi",&DWP(8*(9+15+16-9)+4,"esp"));
523221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax","ecx");
524221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx","edx");			# T1 += X[-16]
525221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax","esi");
526221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx","edi");			# T1 += X[-7]
527221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8*(9+15)+0,"esp"),"eax");
528221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP(8*(9+15)+4,"esp"),"ebx");	# save X[0]
529221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
530221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&BODY_00_15_x86();
531221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
532221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&cmp	(&LB("edx"),0x17);
533221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&jne	(&label("16_79_x86"));
534221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
535221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esi",&DWP(8*(9+16+80)+0,"esp"));# ctx
536221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edi",&DWP(8*(9+16+80)+4,"esp"));# inp
537221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    for($i=0;$i<4;$i++) {
538221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("eax",&DWP($i*16+0,"esi"));
539221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ebx",&DWP($i*16+4,"esi"));
540221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("ecx",&DWP($i*16+8,"esi"));
541221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("edx",&DWP($i*16+12,"esi"));
542221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("eax",&DWP(8+($i*16)+0,"esp"));
543221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("ebx",&DWP(8+($i*16)+4,"esp"));
544221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP($i*16+0,"esi"),"eax");
545221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP($i*16+4,"esi"),"ebx");
546221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("ecx",&DWP(8+($i*16)+8,"esp"));
547221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&adc	("edx",&DWP(8+($i*16)+12,"esp"));
548221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP($i*16+8,"esi"),"ecx");
549221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	(&DWP($i*16+12,"esi"),"edx");
550221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    }
551221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&add	("esp",8*(9+16+80));		# destroy frame
552221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&sub	($K512,8*80);			# rewind K
553221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
554221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&cmp	("edi",&DWP(8,"esp"));		# are we done yet?
555221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&jb	(&label("loop_x86"));
556221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
557221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&mov	("esp",&DWP(12,"esp"));		# restore sp
558221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&function_end_A();
559221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
560221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&set_label("K512",64);	# Yes! I keep it in the code segment!
561221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xd728ae22,0x428a2f98);	# u64
562221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x23ef65cd,0x71374491);	# u64
563221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xec4d3b2f,0xb5c0fbcf);	# u64
564221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x8189dbbc,0xe9b5dba5);	# u64
565221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xf348b538,0x3956c25b);	# u64
566221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xb605d019,0x59f111f1);	# u64
567221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xaf194f9b,0x923f82a4);	# u64
568221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xda6d8118,0xab1c5ed5);	# u64
569221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xa3030242,0xd807aa98);	# u64
570221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x45706fbe,0x12835b01);	# u64
571221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x4ee4b28c,0x243185be);	# u64
572221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xd5ffb4e2,0x550c7dc3);	# u64
573221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xf27b896f,0x72be5d74);	# u64
574221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x3b1696b1,0x80deb1fe);	# u64
575221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x25c71235,0x9bdc06a7);	# u64
576221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xcf692694,0xc19bf174);	# u64
577221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x9ef14ad2,0xe49b69c1);	# u64
578221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x384f25e3,0xefbe4786);	# u64
579221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x8b8cd5b5,0x0fc19dc6);	# u64
580221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x77ac9c65,0x240ca1cc);	# u64
581221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x592b0275,0x2de92c6f);	# u64
582221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x6ea6e483,0x4a7484aa);	# u64
583221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xbd41fbd4,0x5cb0a9dc);	# u64
584221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x831153b5,0x76f988da);	# u64
585221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xee66dfab,0x983e5152);	# u64
586221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x2db43210,0xa831c66d);	# u64
587221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x98fb213f,0xb00327c8);	# u64
588221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xbeef0ee4,0xbf597fc7);	# u64
589221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x3da88fc2,0xc6e00bf3);	# u64
590221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x930aa725,0xd5a79147);	# u64
591221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xe003826f,0x06ca6351);	# u64
592221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x0a0e6e70,0x14292967);	# u64
593221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x46d22ffc,0x27b70a85);	# u64
594221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x5c26c926,0x2e1b2138);	# u64
595221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x5ac42aed,0x4d2c6dfc);	# u64
596221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x9d95b3df,0x53380d13);	# u64
597221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x8baf63de,0x650a7354);	# u64
598221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x3c77b2a8,0x766a0abb);	# u64
599221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x47edaee6,0x81c2c92e);	# u64
600221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x1482353b,0x92722c85);	# u64
601221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x4cf10364,0xa2bfe8a1);	# u64
602221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xbc423001,0xa81a664b);	# u64
603221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xd0f89791,0xc24b8b70);	# u64
604221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x0654be30,0xc76c51a3);	# u64
605221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xd6ef5218,0xd192e819);	# u64
606221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x5565a910,0xd6990624);	# u64
607221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x5771202a,0xf40e3585);	# u64
608221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x32bbd1b8,0x106aa070);	# u64
609221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xb8d2d0c8,0x19a4c116);	# u64
610221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x5141ab53,0x1e376c08);	# u64
611221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xdf8eeb99,0x2748774c);	# u64
612221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xe19b48a8,0x34b0bcb5);	# u64
613221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xc5c95a63,0x391c0cb3);	# u64
614221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xe3418acb,0x4ed8aa4a);	# u64
615221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x7763e373,0x5b9cca4f);	# u64
616221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xd6b2b8a3,0x682e6ff3);	# u64
617221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x5defb2fc,0x748f82ee);	# u64
618221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x43172f60,0x78a5636f);	# u64
619221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xa1f0ab72,0x84c87814);	# u64
620221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x1a6439ec,0x8cc70208);	# u64
621221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x23631e28,0x90befffa);	# u64
622221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xde82bde9,0xa4506ceb);	# u64
623221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xb2c67915,0xbef9a3f7);	# u64
624221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xe372532b,0xc67178f2);	# u64
625221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xea26619c,0xca273ece);	# u64
626221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x21c0c207,0xd186b8c7);	# u64
627221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xcde0eb1e,0xeada7dd6);	# u64
628221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xee6ed178,0xf57d4f7f);	# u64
629221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x72176fba,0x06f067aa);	# u64
630221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xa2c898a6,0x0a637dc5);	# u64
631221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xbef90dae,0x113f9804);	# u64
632221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x131c471b,0x1b710b35);	# u64
633221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x23047d84,0x28db77f5);	# u64
634221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x40c72493,0x32caab7b);	# u64
635221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x15c9bebc,0x3c9ebe0a);	# u64
636221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x9c100d4c,0x431d67c4);	# u64
637221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xcb3e42b6,0x4cc5d4be);	# u64
638221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0xfc657e2a,0x597f299c);	# u64
639221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x3ad6faec,0x5fcb6fab);	# u64
640221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	&data_word(0x4a475817,0x6c44198c);	# u64
641221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&function_end_B("sha512_block_data_order");
642221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
643221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
644221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom&asm_finish();
645