1656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project#!/usr/bin/env perl
2656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
3656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# ====================================================================
43f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# project. The module is, however, dual licensed under OpenSSL and
6656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# CRYPTOGAMS licenses depending on where you obtain it. For further
7656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# details see http://www.openssl.org/~appro/cryptogams/.
8656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# ====================================================================
9656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
10656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# SHA256 block procedure for ARMv4. May 2007.
11656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
12656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# Performance is ~2x better than gcc 3.4 generated code and in "abso-
13656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
1443c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# byte [on single-issue Xscale PXA250 core].
15656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
1643c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# July 2010.
1743c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom#
1843c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# Rescheduling for dual-issue pipeline resulted in 22% improvement on
1943c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# Cortex A8 core and ~20 cycles per processed byte.
2043c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom
21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# February 2011.
22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Profiler-assisted and platform-specific optimization resulted in 16%
243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# September 2013.
273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#
283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Add NEON implementation. On Cortex A8 it was measured to process one
293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# code (meaning that latter performs sub-optimally, nothing was done
323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# about it).
333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# May 2014.
353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#
363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
3843c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstromwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
39656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectopen STDOUT,">$output";
40656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
41656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$ctx="r0";	$t0="r0";
423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$inp="r1";	$t4="r1";
43656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$len="r2";	$t1="r2";
443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$T1="r3";	$t3="r3";
45656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$A="r4";
46656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$B="r5";
47656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$C="r6";
48656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$D="r7";
49656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$E="r8";
50656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$F="r9";
51656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$G="r10";
52656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$H="r11";
53656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@V=($A,$B,$C,$D,$E,$F,$G,$H);
54656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$t2="r12";
55656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$Ktbl="r14";
56656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
57656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@Sigma0=( 2,13,22);
58656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@Sigma1=( 6,11,25);
59656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@sigma0=( 7,18, 3);
60656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@sigma1=(17,19,10);
61656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
62656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectsub BODY_00_15 {
63656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectmy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
64656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
65656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___ if ($i<16);
66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#if __ARM_ARCH__>=7
673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	@ ldr	$t1,[$inp],#4			@ $i
683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if $i==15
693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str	$inp,[sp,#17*4]			@ make room for $t4
703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif
713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	rev	$t1,$t1
75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#else
763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	@ ldrb	$t1,[$inp,#3]			@ $i
773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
78656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	ldrb	$t2,[$inp,#2]
793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldrb	$t0,[$inp,#1]
803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	orr	$t1,$t1,$t2,lsl#8
813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldrb	$t2,[$inp],#4
823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	orr	$t1,$t1,$t0,lsl#16
833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if $i==15
843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str	$inp,[sp,#17*4]			@ make room for $t4
853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif
863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	orr	$t1,$t1,$t2,lsl#24
883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#endif
90656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___
91656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___;
92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldr	$t2,[$Ktbl],#4			@ *K256++
933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$h,$h,$t1			@ h+=X[i]
943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str	$t1,[sp,#`$i%16`*4]
95656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	eor	$t1,$f,$g
963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$h,$h,$t0,ror#$Sigma1[0]	@ h+=Sigma1(e)
97656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	and	$t1,$t1,$e
983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$h,$h,$t2			@ h+=K256[i]
99656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	eor	$t1,$t1,$g			@ Ch(e,f,g)
1003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
1013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$h,$h,$t1			@ h+=Ch(e,f,g)
1023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if $i==31
1033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	and	$t2,$t2,#0xff
1043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	cmp	$t2,#0xf2			@ done?
105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#endif
1063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if $i<15
1073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if __ARM_ARCH__>=7
1083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[$inp],#4			@ prefetch
1093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# else
1103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldrb	$t1,[$inp,#3]
1113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif
1123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t2,$a,$b			@ a^b, b^c in next round
1133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#else
1143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[sp,#`($i+2)%16`*4]		@ from future BODY_16_xx
1153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t2,$a,$b			@ a^b, b^c in next round
1163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t4,[sp,#`($i+15)%16`*4]	@ from future BODY_16_xx
1173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif
1183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]`	@ Sigma0(a)
1193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	and	$t3,$t3,$t2			@ (b^c)&=(a^b)
1203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$d,$d,$h			@ d+=h
1213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t3,$t3,$b			@ Maj(a,b,c)
1223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$h,$h,$t0,ror#$Sigma0[0]	@ h+=Sigma0(a)
1233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	@ add	$h,$h,$t3			@ h+=Maj(a,b,c)
124656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___
1253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	($t2,$t3)=($t3,$t2);
126656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project}
127656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
128656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectsub BODY_16_XX {
129656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectmy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
130656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
131656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___;
1323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	@ ldr	$t1,[sp,#`($i+1)%16`*4]		@ $i
1333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	@ ldr	$t4,[sp,#`($i+14)%16`*4]
1343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	mov	$t0,$t1,ror#$sigma0[0]
1353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
1363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	mov	$t2,$t4,ror#$sigma1[0]
1373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$t0,$t1,ror#$sigma0[1]
1383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t2,$t2,$t4,ror#$sigma1[1]
1393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1])
1403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[sp,#`($i+0)%16`*4]
1413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t2,$t2,$t4,lsr#$sigma1[2]	@ sigma1(X[i+14])
1423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t4,[sp,#`($i+9)%16`*4]
1433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
1443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$t2,$t2,$t0
1453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`	@ from BODY_00_15
1463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$t1,$t1,$t2
1473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
1483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$t1,$t1,$t4			@ X[i]
149656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___
150656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	&BODY_00_15(@_);
151656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project}
152656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
153656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code=<<___;
154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#include "arm_arch.h"
155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
156656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.text
157656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.code	32
158656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
159656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.type	K256,%object
160656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.align	5
161656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source ProjectK256:
162656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
163656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
164656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
165656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
166656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
167656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
168656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
169656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
170656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
171656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
172656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
173656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
174656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
175656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
176656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
177656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
178656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.size	K256,.-K256
1793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.word	0				@ terminator
1803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LOPENSSL_armcap:
1813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.word	OPENSSL_armcap_P-sha256_block_data_order
1823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align	5
183656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
184656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.global	sha256_block_data_order
185656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.type	sha256_block_data_order,%function
186656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectsha256_block_data_order:
187656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	sub	r3,pc,#8		@ sha256_block_data_order
188656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
1893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7
1903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	r12,.LOPENSSL_armcap
1913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
1923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	tst	r12,#ARMV8_SHA256
1933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	bne	.LARMv8
1943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	tst	r12,#ARMV7_NEON
1953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	bne	.LNEON
1963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif
197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stmdb	sp!,{$ctx,$inp,$len,r4-r11,lr}
198656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
1993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub	$Ktbl,r3,#256+32	@ K256
200656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	sub	sp,sp,#16*4		@ alloca(X[16])
201656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.Loop:
2023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if __ARM_ARCH__>=7
2033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[$inp],#4
2043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# else
2053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldrb	$t1,[$inp,#3]
2063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif
2073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t3,$B,$C		@ magic
2083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor	$t2,$t2,$t2
209656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___
210656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectfor($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
211656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=".Lrounds_16_xx:\n";
212656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectfor (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
213656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___;
2143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldreq	$t3,[sp,#16*4]		@ pull ctx
215656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	bne	.Lrounds_16_xx
216656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
2173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$A,$A,$t2		@ h+=Maj(a,b,c) from the past
2183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t0,[$t3,#0]
2193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[$t3,#4]
2203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t2,[$t3,#8]
221656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$A,$A,$t0
2223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t0,[$t3,#12]
223656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$B,$B,$t1
2243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[$t3,#16]
225656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$C,$C,$t2
2263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t2,[$t3,#20]
227656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$D,$D,$t0
2283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t0,[$t3,#24]
229656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$E,$E,$t1
2303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[$t3,#28]
231656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$F,$F,$t2
232656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	ldr	$inp,[sp,#17*4]		@ pull inp
233656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	ldr	$t2,[sp,#18*4]		@ pull inp+len
234656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$G,$G,$t0
235656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	$H,$H,$t1
2363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	stmia	$t3,{$A,$B,$C,$D,$E,$F,$G,$H}
237656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	cmp	$inp,$t2
238656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl
239656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	bne	.Loop
240656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
241656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	add	sp,sp,#`16+3`*4	@ destroy frame
242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#if __ARM_ARCH__>=5
243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldmia	sp!,{r4-r11,pc}
244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#else
245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldmia	sp!,{r4-r11,lr}
246656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	tst	lr,#1
247656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	moveq	pc,lr			@ be binary compatible with V4, yet
248656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project	bx	lr			@ interoperable with Thumb ISA:-)
249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#endif
2503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.size	sha256_block_data_order,.-sha256_block_data_order
2513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___
2523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root######################################################################
2533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# NEON stuff
2543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#
2553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{{{
2563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy @X=map("q$_",(0..3));
2573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
2583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $Xfer=$t4;
2593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $j=0;
2603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
2613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
2623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
2633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
2643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub AUTOLOAD()          # thunk [simplified] x86-style perlasm
2653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
2663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root  my $arg = pop;
2673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root    $arg = "#$arg" if ($arg*1 eq $arg);
2683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
2693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}
2703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
2713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Xupdate()
2723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ use integer;
2733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root  my $body = shift;
2743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root  my @insns = (&$body,&$body,&$body,&$body);
2753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root  my ($a,$b,$c,$d,$e,$f,$g,$h);
2763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
2773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vext_8		($T0,@X[0],@X[1],4);	# X[1..4]
2783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vext_8		($T1,@X[2],@X[3],4);	# X[9..12]
2823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vshr_u32	($T2,$T0,$sigma0[0]);
2863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += X[9..12]
2893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vshr_u32	($T1,$T0,$sigma0[2]);
2923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vsli_32	($T2,$T0,32-$sigma0[0]);
2953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vshr_u32	($T3,$T0,$sigma0[1]);
2983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
2993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&veor		($T1,$T1,$T2);
3013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vsli_32	($T3,$T0,32-$sigma0[1]);
3043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[0]);
3073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&veor		($T1,$T1,$T3);		# sigma0(X[1..4])
3103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[0]);
3133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vshr_u32	($T5,&Dhi(@X[3]),$sigma1[2]);
3163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += sigma0(X[1..4])
3193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &veor		($T5,$T5,$T4);
3223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[1]);
3253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[1]);
3283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &veor		($T5,$T5,$T4);		# sigma1(X[14..15])
3313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vadd_i32	(&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
3343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[0]);
3373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[0]);
3403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vshr_u32	($T5,&Dlo(@X[0]),$sigma1[2]);
3433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &veor		($T5,$T5,$T4);
3463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[1]);
3493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vld1_32	("{$T0}","[$Ktbl,:128]!");
3523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[1]);
3553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	  &veor		($T5,$T5,$T4);		# sigma1(X[16..17])
3583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vadd_i32	(&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
3613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vadd_i32	($T0,$T0,@X[0]);
3643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 while($#insns>=2) { eval(shift(@insns)); }
3653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vst1_32	("{$T0}","[$Xfer,:128]!");
3663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
3693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	push(@X,shift(@X));		# "rotate" X[]
3703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}
3713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
3723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Xpreload()
3733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ use integer;
3743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root  my $body = shift;
3753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root  my @insns = (&$body,&$body,&$body,&$body);
3763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root  my ($a,$b,$c,$d,$e,$f,$g,$h);
3773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
3783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vld1_32	("{$T0}","[$Ktbl,:128]!");
3833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vrev32_8	(@X[0],@X[0]);
3883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 eval(shift(@insns));
3923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vadd_i32	($T0,$T0,@X[0]);
3933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	 foreach (@insns) { eval; }	# remaining instructions
3943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&vst1_32	("{$T0}","[$Xfer,:128]!");
3953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
3963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	push(@X,shift(@X));		# "rotate" X[]
3973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}
3983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
3993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub body_00_15 () {
4003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	(
4013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
4023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&add	($h,$h,$t1)',			# h+=X[i]+K[i]
4033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t1,$f,$g)',
4043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
4053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&add	($a,$a,$t2)',			# h+=Maj(a,b,c) from the past
4063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&and	($t1,$t1,$e)',
4073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',	# Sigma1(e)
4083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
4093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t1,$t1,$g)',			# Ch(e,f,g)
4103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&add	($h,$h,$t2,"ror#$Sigma1[0]")',	# h+=Sigma1(e)
4113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t2,$a,$b)',			# a^b, b^c in next round
4123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',	# Sigma0(a)
4133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&add	($h,$h,$t1)',			# h+=Ch(e,f,g)
4143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&ldr	($t1,sprintf "[sp,#%d]",4*(($j+1)&15))	if (($j&15)!=15);'.
4153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&ldr	($t1,"[$Ktbl]")				if ($j==15);'.
4163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&ldr	($t1,"[sp,#64]")			if ($j==31)',
4173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&and	($t3,$t3,$t2)',			# (b^c)&=(a^b)
4183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&add	($d,$d,$h)',			# d+=h
4193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&add	($h,$h,$t0,"ror#$Sigma0[0]");'.	# h+=Sigma0(a)
4203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'&eor	($t3,$t3,$b)',			# Maj(a,b,c)
4213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	'$j++;	unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
4223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	)
4233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}
4243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
4253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___;
4263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7
4273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.fpu	neon
4283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
4293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.type	sha256_block_data_order_neon,%function
4303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align	4
4313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsha256_block_data_order_neon:
4323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON:
4333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	stmdb	sp!,{r4-r12,lr}
4343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
4353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	mov	$t2,sp
4363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub	sp,sp,#16*4+16		@ alloca
4373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub	$Ktbl,r3,#256+32	@ K256
4383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	bic	sp,sp,#15		@ align for 128-bit stores
4393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
4403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[0]},[$inp]!
4413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[1]},[$inp]!
4423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[2]},[$inp]!
4433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[3]},[$inp]!
4443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$T0},[$Ktbl,:128]!
4453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$T1},[$Ktbl,:128]!
4463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$T2},[$Ktbl,:128]!
4473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$T3},[$Ktbl,:128]!
4483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@X[0],@X[0]		@ yes, even on
4493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str		$ctx,[sp,#64]
4503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@X[1],@X[1]		@ big-endian
4513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str		$inp,[sp,#68]
4523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	mov		$Xfer,sp
4533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@X[2],@X[2]
4543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str		$len,[sp,#72]
4553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@X[3],@X[3]
4563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str		$t2,[sp,#76]		@ save original sp
4573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$T0,$T0,@X[0]
4583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$T1,$T1,@X[1]
4593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vst1.32		{$T0},[$Xfer,:128]!
4603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$T2,$T2,@X[2]
4613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vst1.32		{$T1},[$Xfer,:128]!
4623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$T3,$T3,@X[3]
4633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vst1.32		{$T2},[$Xfer,:128]!
4643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vst1.32		{$T3},[$Xfer,:128]!
4653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
4663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldmia		$ctx,{$A-$H}
4673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub		$Xfer,$Xfer,#64
4683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr		$t1,[sp,#0]
4693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor		$t2,$t2,$t2
4703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eor		$t3,$B,$C
4713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	b		.L_00_48
4723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
4733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align	4
4743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.L_00_48:
4753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___
4763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xupdate(\&body_00_15);
4773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xupdate(\&body_00_15);
4783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xupdate(\&body_00_15);
4793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xupdate(\&body_00_15);
4803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___;
4813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	teq	$t1,#0				@ check for K256 terminator
4823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t1,[sp,#0]
4833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub	$Xfer,$Xfer,#64
4843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	bne	.L_00_48
4853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
4863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr		$inp,[sp,#68]
4873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr		$t0,[sp,#72]
4883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub		$Ktbl,$Ktbl,#256	@ rewind $Ktbl
4893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	teq		$inp,$t0
4903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	subeq		$inp,$inp,#64		@ avoid SEGV
4913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[0]},[$inp]!		@ load next input block
4923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[1]},[$inp]!
4933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[2]},[$inp]!
4943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@X[3]},[$inp]!
4953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	strne		$inp,[sp,#68]
4963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	mov		$Xfer,sp
4973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___
4983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xpreload(\&body_00_15);
4993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xpreload(\&body_00_15);
5003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xpreload(\&body_00_15);
5013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	&Xpreload(\&body_00_15);
5023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___;
5033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t0,[$t1,#0]
5043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$A,$A,$t2			@ h+=Maj(a,b,c) from the past
5053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t2,[$t1,#4]
5063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t3,[$t1,#8]
5073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t4,[$t1,#12]
5083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$A,$A,$t0			@ accumulate
5093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t0,[$t1,#16]
5103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$B,$B,$t2
5113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t2,[$t1,#20]
5123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$C,$C,$t3
5133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t3,[$t1,#24]
5143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$D,$D,$t4
5153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldr	$t4,[$t1,#28]
5163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$E,$E,$t0
5173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str	$A,[$t1],#4
5183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$F,$F,$t2
5193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str	$B,[$t1],#4
5203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$G,$G,$t3
5213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str	$C,[$t1],#4
5223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	add	$H,$H,$t4
5233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	str	$D,[$t1],#4
5243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	stmia	$t1,{$E-$H}
5253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
5263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	movne	$Xfer,sp
5273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldrne	$t1,[sp,#0]
5283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eorne	$t2,$t2,$t2
5293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldreq	sp,[sp,#76]			@ restore original sp
5303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	eorne	$t3,$B,$C
5313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	bne	.L_00_48
5323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
5333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ldmia	sp!,{r4-r12,pc}
5343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
5353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif
5363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___
5373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}}}
5383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root######################################################################
5393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# ARMv8 stuff
5403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#
5413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{{{
5423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
5433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy @MSG=map("q$_",(8..11));
5443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
5453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $Ktbl="r3";
5463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
5473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___;
5483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7
5493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.type	sha256_block_data_order_armv8,%function
5503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align	5
5513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsha256_block_data_order_armv8:
5523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LARMv8:
5533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32	{$ABCD,$EFGH},[$ctx]
5543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub	$Ktbl,r3,#sha256_block_data_order-K256
5553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
5563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.Loop_v8:
5573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@MSG[0]-@MSG[1]},[$inp]!
5583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.8		{@MSG[2]-@MSG[3]},[$inp]!
5593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$W0},[$Ktbl]!
5603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@MSG[0],@MSG[0]
5613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@MSG[1],@MSG[1]
5623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@MSG[2],@MSG[2]
5633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vrev32.8	@MSG[3],@MSG[3]
5643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vmov		$ABCD_SAVE,$ABCD	@ offload
5653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vmov		$EFGH_SAVE,$EFGH
5663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	teq		$inp,$len
5673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___
5683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootfor($i=0;$i<12;$i++) {
5693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___;
5703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$W1},[$Ktbl]!
5713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$W0,$W0,@MSG[0]
5723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256su0	@MSG[0],@MSG[1]
5733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vmov		$abcd,$ABCD
5743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h		$ABCD,$EFGH,$W0
5753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h2	$EFGH,$abcd,$W0
5763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256su1	@MSG[0],@MSG[2],@MSG[3]
5773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___
5783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
5793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}
5803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___;
5813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$W1},[$Ktbl]!
5823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$W0,$W0,@MSG[0]
5833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vmov		$abcd,$ABCD
5843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h		$ABCD,$EFGH,$W0
5853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h2	$EFGH,$abcd,$W0
5863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
5873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$W0},[$Ktbl]!
5883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$W1,$W1,@MSG[1]
5893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vmov		$abcd,$ABCD
5903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h		$ABCD,$EFGH,$W1
5913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h2	$EFGH,$abcd,$W1
5923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
5933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vld1.32		{$W1},[$Ktbl]
5943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$W0,$W0,@MSG[2]
5953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sub		$Ktbl,$Ktbl,#256-16	@ rewind
5963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vmov		$abcd,$ABCD
5973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h		$ABCD,$EFGH,$W0
5983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h2	$EFGH,$abcd,$W0
5993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$W1,$W1,@MSG[3]
6013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vmov		$abcd,$ABCD
6023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h		$ABCD,$EFGH,$W1
6033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	sha256h2	$EFGH,$abcd,$W1
6043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$ABCD,$ABCD,$ABCD_SAVE
6063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vadd.i32	$EFGH,$EFGH,$EFGH_SAVE
6073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	bne		.Loop_v8
6083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	vst1.32		{$ABCD,$EFGH},[$ctx]
6103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	ret		@ bx lr
6123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
6133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif
6143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___
6153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}}}
6163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___;
6173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
618221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align	2
6193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.comm   OPENSSL_armcap_P,4,4
620656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___
621656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project
6223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{   my  %opcode = (
6233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	"sha256h"	=> 0xf3000c40,	"sha256h2"	=> 0xf3100c40,
6243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	"sha256su0"	=> 0xf3ba03c0,	"sha256su1"	=> 0xf3200c40	);
6253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root    sub unsha256 {
6273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	my ($mnemonic,$arg)=@_;
6283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
6303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
6313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root					 |(($2&7)<<17)|(($2&8)<<4)
6323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root					 |(($3&7)<<1) |(($3&8)<<2);
6333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	    # since ARMv7 instructions are always encoded little-endian.
6343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	    # correct solution is to use .inst directive, but older
6353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	    # assemblers don't implement it:-(
6363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
6373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root			$word&0xff,($word>>8)&0xff,
6383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root			($word>>16)&0xff,($word>>24)&0xff,
6393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root			$mnemonic,$arg;
6403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	}
6413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root    }
6423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}
6433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootforeach (split($/,$code)) {
6453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	s/\`([^\`]*)\`/eval $1/geo;
6473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
6493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	s/\bret\b/bx	lr/go		or
6513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
6523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
6533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root	print $_,"\n";
6543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}
6553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root
656656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectclose STDOUT; # enforce flush
657