1656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project#!/usr/bin/env perl 2656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 3656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# ==================================================================== 43f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# project. The module is, however, dual licensed under OpenSSL and 6656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# CRYPTOGAMS licenses depending on where you obtain it. For further 7656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# details see http://www.openssl.org/~appro/cryptogams/. 8656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# ==================================================================== 9656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 10656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# SHA256 block procedure for ARMv4. May 2007. 11656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 12656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# Performance is ~2x better than gcc 3.4 generated code and in "abso- 13656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per 1443c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# byte [on single-issue Xscale PXA250 core]. 15656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 1643c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# July 2010. 1743c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# 1843c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# Rescheduling for dual-issue pipeline resulted in 22% improvement on 1943c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom# Cortex A8 core and ~20 cycles per processed byte. 2043c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstrom 21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# February 2011. 22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Profiler-assisted and platform-specific optimization resulted in 16% 243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# improvement on Cortex A8 core and ~15.4 cycles per processed byte. 253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# September 2013. 273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# 283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Add NEON implementation. On Cortex A8 it was measured to process one 293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon 303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only 313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# code (meaning that latter performs sub-optimally, nothing was done 323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# about it). 333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# May 2014. 353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# 363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Add ARMv8 code path performing at 2.0 cpb on Apple A7. 37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3843c12e3d4f9bbbbd4a8ba7b149686437514bc6b6Brian Carlstromwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 39656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectopen STDOUT,">$output"; 40656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 41656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$ctx="r0"; $t0="r0"; 423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$inp="r1"; $t4="r1"; 43656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$len="r2"; $t1="r2"; 443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$T1="r3"; $t3="r3"; 45656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$A="r4"; 46656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$B="r5"; 47656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$C="r6"; 48656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$D="r7"; 49656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$E="r8"; 50656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$F="r9"; 51656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$G="r10"; 52656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$H="r11"; 53656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@V=($A,$B,$C,$D,$E,$F,$G,$H); 54656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$t2="r12"; 55656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$Ktbl="r14"; 56656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 57656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@Sigma0=( 2,13,22); 58656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@Sigma1=( 6,11,25); 59656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@sigma0=( 7,18, 3); 60656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project@sigma1=(17,19,10); 61656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 62656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectsub BODY_00_15 { 63656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectmy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 64656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 65656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___ if ($i<16); 66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#if __ARM_ARCH__>=7 673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root @ ldr $t1,[$inp],#4 @ $i 683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if $i==15 693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $inp,[sp,#17*4] @ make room for $t4 703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif 713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root rev $t1,$t1 75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#else 763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root @ ldrb $t1,[$inp,#3] @ $i 773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 78656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldrb $t2,[$inp,#2] 793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldrb $t0,[$inp,#1] 803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root orr $t1,$t1,$t2,lsl#8 813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldrb $t2,[$inp],#4 823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root orr $t1,$t1,$t0,lsl#16 833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if $i==15 843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $inp,[sp,#17*4] @ make room for $t4 853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif 863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root orr $t1,$t1,$t2,lsl#24 883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#endif 90656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___ 91656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___; 92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $t2,[$Ktbl],#4 @ *K256++ 933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $h,$h,$t1 @ h+=X[i] 943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $t1,[sp,#`$i%16`*4] 95656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project eor $t1,$f,$g 963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) 97656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project and $t1,$t1,$e 983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $h,$h,$t2 @ h+=K256[i] 99656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project eor $t1,$t1,$g @ Ch(e,f,g) 1003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` 1013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $h,$h,$t1 @ h+=Ch(e,f,g) 1023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if $i==31 1033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root and $t2,$t2,#0xff 1043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root cmp $t2,#0xf2 @ done? 105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#endif 1063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if $i<15 1073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if __ARM_ARCH__>=7 1083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[$inp],#4 @ prefetch 1093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# else 1103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldrb $t1,[$inp,#3] 1113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif 1123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t2,$a,$b @ a^b, b^c in next round 1133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#else 1143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx 1153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t2,$a,$b @ a^b, b^c in next round 1163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx 1173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 1183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) 1193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root and $t3,$t3,$t2 @ (b^c)&=(a^b) 1203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $d,$d,$h @ d+=h 1213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t3,$t3,$b @ Maj(a,b,c) 1223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) 1233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root @ add $h,$h,$t3 @ h+=Maj(a,b,c) 124656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___ 1253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ($t2,$t3)=($t3,$t2); 126656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project} 127656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 128656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectsub BODY_16_XX { 129656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectmy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 130656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 131656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___; 1323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i 1333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root @ ldr $t4,[sp,#`($i+14)%16`*4] 1343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $t0,$t1,ror#$sigma0[0] 1353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 1363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $t2,$t4,ror#$sigma1[0] 1373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$t0,$t1,ror#$sigma0[1] 1383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t2,$t2,$t4,ror#$sigma1[1] 1393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 1403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[sp,#`($i+0)%16`*4] 1413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) 1423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t4,[sp,#`($i+9)%16`*4] 1433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 1443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $t2,$t2,$t0 1453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 1463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $t1,$t1,$t2 1473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 1483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $t1,$t1,$t4 @ X[i] 149656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___ 150656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project &BODY_00_15(@_); 151656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project} 152656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 153656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code=<<___; 154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#include "arm_arch.h" 155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 156656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.text 157656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.code 32 158656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 159656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.type K256,%object 160656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.align 5 161656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source ProjectK256: 162656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 163656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 164656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 165656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 166656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 167656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 168656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 169656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 170656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 171656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 172656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 173656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 174656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 175656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 176656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 177656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 178656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.size K256,.-K256 1793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.word 0 @ terminator 1803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LOPENSSL_armcap: 1813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.word OPENSSL_armcap_P-sha256_block_data_order 1823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 5 183656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 184656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.global sha256_block_data_order 185656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.type sha256_block_data_order,%function 186656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectsha256_block_data_order: 187656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub r3,pc,#8 @ sha256_block_data_order 188656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $len,$inp,$len,lsl#6 @ len to point at the end of inp 1893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7 1903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr r12,.LOPENSSL_armcap 1913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr r12,[r3,r12] @ OPENSSL_armcap_P 1923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root tst r12,#ARMV8_SHA256 1933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LARMv8 1943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root tst r12,#ARMV7_NEON 1953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON 1963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} 198656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 1993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $Ktbl,r3,#256+32 @ K256 200656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub sp,sp,#16*4 @ alloca(X[16]) 201656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.Loop: 2023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# if __ARM_ARCH__>=7 2033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[$inp],#4 2043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# else 2053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldrb $t1,[$inp,#3] 2063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# endif 2073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t3,$B,$C @ magic 2083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t2,$t2,$t2 209656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___ 210656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectfor($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } 211656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=".Lrounds_16_xx:\n"; 212656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectfor (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } 213656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code.=<<___; 2143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldreq $t3,[sp,#16*4] @ pull ctx 215656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bne .Lrounds_16_xx 216656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 2173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 2183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t0,[$t3,#0] 2193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[$t3,#4] 2203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t2,[$t3,#8] 221656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $A,$A,$t0 2223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t0,[$t3,#12] 223656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $B,$B,$t1 2243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[$t3,#16] 225656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $C,$C,$t2 2263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t2,[$t3,#20] 227656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $D,$D,$t0 2283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t0,[$t3,#24] 229656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $E,$E,$t1 2303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[$t3,#28] 231656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $F,$F,$t2 232656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $inp,[sp,#17*4] @ pull inp 233656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $t2,[sp,#18*4] @ pull inp+len 234656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $G,$G,$t0 235656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $H,$H,$t1 2363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} 237656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project cmp $inp,$t2 238656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub $Ktbl,$Ktbl,#256 @ rewind Ktbl 239656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bne .Loop 240656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 241656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add sp,sp,#`16+3`*4 @ destroy frame 242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#if __ARM_ARCH__>=5 243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldmia sp!,{r4-r11,pc} 244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#else 245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldmia sp!,{r4-r11,lr} 246656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project tst lr,#1 247656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project moveq pc,lr @ be binary compatible with V4, yet 248656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bx lr @ interoperable with Thumb ISA:-) 249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#endif 2503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.size sha256_block_data_order,.-sha256_block_data_order 2513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 2523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root###################################################################### 2533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# NEON stuff 2543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# 2553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{{{ 2563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy @X=map("q$_",(0..3)); 2573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); 2583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $Xfer=$t4; 2593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $j=0; 2603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } 2623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } 2633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub AUTOLOAD() # thunk [simplified] x86-style perlasm 2653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; 2663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my $arg = pop; 2673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root $arg = "#$arg" if ($arg*1 eq $arg); 2683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; 2693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 2703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Xupdate() 2723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ use integer; 2733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my $body = shift; 2743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my @insns = (&$body,&$body,&$body,&$body); 2753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my ($a,$b,$c,$d,$e,$f,$g,$h); 2763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] 2783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] 2823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T2,$T0,$sigma0[0]); 2863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] 2893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T1,$T0,$sigma0[2]); 2923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vsli_32 ($T2,$T0,32-$sigma0[0]); 2953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T3,$T0,$sigma0[1]); 2983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 2993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &veor ($T1,$T1,$T2); 3013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vsli_32 ($T3,$T0,32-$sigma0[1]); 3043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); 3073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &veor ($T1,$T1,$T3); # sigma0(X[1..4]) 3103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); 3133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); 3163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) 3193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &veor ($T5,$T5,$T4); 3223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); 3253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); 3283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &veor ($T5,$T5,$T4); # sigma1(X[14..15]) 3313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) 3343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); 3373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); 3403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); 3433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &veor ($T5,$T5,$T4); 3463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); 3493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 3523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); 3553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &veor ($T5,$T5,$T4); # sigma1(X[16..17]) 3583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) 3613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vadd_i32 ($T0,$T0,@X[0]); 3643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root while($#insns>=2) { eval(shift(@insns)); } 3653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vst1_32 ("{$T0}","[$Xfer,:128]!"); 3663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root push(@X,shift(@X)); # "rotate" X[] 3703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 3713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Xpreload() 3733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ use integer; 3743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my $body = shift; 3753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my @insns = (&$body,&$body,&$body,&$body); 3763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my ($a,$b,$c,$d,$e,$f,$g,$h); 3773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 3833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vrev32_8 (@X[0],@X[0]); 3883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eval(shift(@insns)); 3923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vadd_i32 ($T0,$T0,@X[0]); 3933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root foreach (@insns) { eval; } # remaining instructions 3943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &vst1_32 ("{$T0}","[$Xfer,:128]!"); 3953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root push(@X,shift(@X)); # "rotate" X[] 3973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 3983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub body_00_15 () { 4003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ( 4013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. 4023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&add ($h,$h,$t1)', # h+=X[i]+K[i] 4033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t1,$f,$g)', 4043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', 4053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past 4063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&and ($t1,$t1,$e)', 4073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) 4083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', 4093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t1,$t1,$g)', # Ch(e,f,g) 4103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) 4113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t2,$a,$b)', # a^b, b^c in next round 4123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) 4133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&add ($h,$h,$t1)', # h+=Ch(e,f,g) 4143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. 4153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&ldr ($t1,"[$Ktbl]") if ($j==15);'. 4163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&ldr ($t1,"[sp,#64]") if ($j==31)', 4173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) 4183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&add ($d,$d,$h)', # d+=h 4193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) 4203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '&eor ($t3,$t3,$b)', # Maj(a,b,c) 4213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' 4223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ) 4233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 4243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 4263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7 4273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.fpu neon 4283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.type sha256_block_data_order_neon,%function 4303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 4 4313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsha256_block_data_order_neon: 4323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON: 4333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root stmdb sp!,{r4-r12,lr} 4343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $t2,sp 4363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub sp,sp,#16*4+16 @ alloca 4373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $Ktbl,r3,#256+32 @ K256 4383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bic sp,sp,#15 @ align for 128-bit stores 4393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[0]},[$inp]! 4413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[1]},[$inp]! 4423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[2]},[$inp]! 4433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[3]},[$inp]! 4443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$T0},[$Ktbl,:128]! 4453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$T1},[$Ktbl,:128]! 4463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$T2},[$Ktbl,:128]! 4473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$T3},[$Ktbl,:128]! 4483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @X[0],@X[0] @ yes, even on 4493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $ctx,[sp,#64] 4503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @X[1],@X[1] @ big-endian 4513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $inp,[sp,#68] 4523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $Xfer,sp 4533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @X[2],@X[2] 4543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $len,[sp,#72] 4553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @X[3],@X[3] 4563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $t2,[sp,#76] @ save original sp 4573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $T0,$T0,@X[0] 4583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $T1,$T1,@X[1] 4593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {$T0},[$Xfer,:128]! 4603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $T2,$T2,@X[2] 4613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {$T1},[$Xfer,:128]! 4623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $T3,$T3,@X[3] 4633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {$T2},[$Xfer,:128]! 4643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {$T3},[$Xfer,:128]! 4653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia $ctx,{$A-$H} 4673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $Xfer,$Xfer,#64 4683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[sp,#0] 4693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t2,$t2,$t2 4703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eor $t3,$B,$C 4713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root b .L_00_48 4723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 4 4743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.L_00_48: 4753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 4763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xupdate(\&body_00_15); 4773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xupdate(\&body_00_15); 4783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xupdate(\&body_00_15); 4793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xupdate(\&body_00_15); 4803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 4813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root teq $t1,#0 @ check for K256 terminator 4823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t1,[sp,#0] 4833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $Xfer,$Xfer,#64 4843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .L_00_48 4853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $inp,[sp,#68] 4873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t0,[sp,#72] 4883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl 4893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root teq $inp,$t0 4903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subeq $inp,$inp,#64 @ avoid SEGV 4913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[0]},[$inp]! @ load next input block 4923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[1]},[$inp]! 4933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[2]},[$inp]! 4943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@X[3]},[$inp]! 4953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root strne $inp,[sp,#68] 4963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $Xfer,sp 4973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 4983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xpreload(\&body_00_15); 4993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xpreload(\&body_00_15); 5003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xpreload(\&body_00_15); 5013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root &Xpreload(\&body_00_15); 5023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 5033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t0,[$t1,#0] 5043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 5053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t2,[$t1,#4] 5063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t3,[$t1,#8] 5073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t4,[$t1,#12] 5083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $A,$A,$t0 @ accumulate 5093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t0,[$t1,#16] 5103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $B,$B,$t2 5113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t2,[$t1,#20] 5123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $C,$C,$t3 5133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t3,[$t1,#24] 5143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $D,$D,$t4 5153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr $t4,[$t1,#28] 5163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $E,$E,$t0 5173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $A,[$t1],#4 5183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $F,$F,$t2 5193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $B,[$t1],#4 5203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $G,$G,$t3 5213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $C,[$t1],#4 5223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $H,$H,$t4 5233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root str $D,[$t1],#4 5243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root stmia $t1,{$E-$H} 5253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movne $Xfer,sp 5273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldrne $t1,[sp,#0] 5283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eorne $t2,$t2,$t2 5293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldreq sp,[sp,#76] @ restore original sp 5303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root eorne $t3,$B,$C 5313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .L_00_48 5323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia sp!,{r4-r12,pc} 5343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.size sha256_block_data_order_neon,.-sha256_block_data_order_neon 5353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 5363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 5373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}}} 5383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root###################################################################### 5393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# ARMv8 stuff 5403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# 5413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{{{ 5423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); 5433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy @MSG=map("q$_",(8..11)); 5443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); 5453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $Ktbl="r3"; 5463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 5483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7 5493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.type sha256_block_data_order_armv8,%function 5503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 5 5513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsha256_block_data_order_armv8: 5523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LARMv8: 5533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$ABCD,$EFGH},[$ctx] 5543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $Ktbl,r3,#sha256_block_data_order-K256 5553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.Loop_v8: 5573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@MSG[0]-@MSG[1]},[$inp]! 5583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.8 {@MSG[2]-@MSG[3]},[$inp]! 5593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$W0},[$Ktbl]! 5603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @MSG[0],@MSG[0] 5613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @MSG[1],@MSG[1] 5623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @MSG[2],@MSG[2] 5633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vrev32.8 @MSG[3],@MSG[3] 5643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $ABCD_SAVE,$ABCD @ offload 5653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $EFGH_SAVE,$EFGH 5663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root teq $inp,$len 5673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 5683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootfor($i=0;$i<12;$i++) { 5693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 5703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$W1},[$Ktbl]! 5713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $W0,$W0,@MSG[0] 5723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256su0 @MSG[0],@MSG[1] 5733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $abcd,$ABCD 5743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h $ABCD,$EFGH,$W0 5753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h2 $EFGH,$abcd,$W0 5763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256su1 @MSG[0],@MSG[2],@MSG[3] 5773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 5783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); 5793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 5803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 5813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$W1},[$Ktbl]! 5823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $W0,$W0,@MSG[0] 5833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $abcd,$ABCD 5843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h $ABCD,$EFGH,$W0 5853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h2 $EFGH,$abcd,$W0 5863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$W0},[$Ktbl]! 5883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $W1,$W1,@MSG[1] 5893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $abcd,$ABCD 5903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h $ABCD,$EFGH,$W1 5913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h2 $EFGH,$abcd,$W1 5923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$W1},[$Ktbl] 5943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $W0,$W0,@MSG[2] 5953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $Ktbl,$Ktbl,#256-16 @ rewind 5963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $abcd,$ABCD 5973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h $ABCD,$EFGH,$W0 5983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h2 $EFGH,$abcd,$W0 5993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $W1,$W1,@MSG[3] 6013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $abcd,$ABCD 6023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h $ABCD,$EFGH,$W1 6033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sha256h2 $EFGH,$abcd,$W1 6043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $ABCD,$ABCD,$ABCD_SAVE 6063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.i32 $EFGH,$EFGH,$EFGH_SAVE 6073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .Loop_v8 6083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {$ABCD,$EFGH},[$ctx] 6103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ret @ bx lr 6123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 6133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 6143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 6153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root}}} 6163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 6173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" 618221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align 2 6193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.comm OPENSSL_armcap_P,4,4 620656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___ 621656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 6223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ my %opcode = ( 6233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, 6243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); 6253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub unsha256 { 6273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my ($mnemonic,$arg)=@_; 6283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { 6303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) 6313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root |(($2&7)<<17)|(($2&8)<<4) 6323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root |(($3&7)<<1) |(($3&8)<<2); 6333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root # since ARMv7 instructions are always encoded little-endian. 6343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root # correct solution is to use .inst directive, but older 6353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root # assemblers don't implement it:-( 6363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", 6373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root $word&0xff,($word>>8)&0xff, 6383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ($word>>16)&0xff,($word>>24)&0xff, 6393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root $mnemonic,$arg; 6403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root } 6413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root } 6423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 6433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootforeach (split($/,$code)) { 6453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root s/\`([^\`]*)\`/eval $1/geo; 6473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; 6493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root s/\bret\b/bx lr/go or 6513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 6523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root print $_,"\n"; 6543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 6553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 656656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectclose STDOUT; # enforce flush 657