sha256-armv4.pl revision 1b249678059ecd918235790a7a0471771cc4e5ce
11b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#! /usr/bin/env perl 21b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. 31b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# 41b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# Licensed under the OpenSSL license (the "License"). You may not use 51b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# this file except in compliance with the License. You can obtain a copy 61b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# in the file LICENSE in the source distribution or at 71b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# https://www.openssl.org/source/license.html 81b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin 9d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 10d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ==================================================================== 11d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# project. The module is, however, dual licensed under OpenSSL and 13d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# CRYPTOGAMS licenses depending on where you obtain it. For further 14d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# details see http://www.openssl.org/~appro/cryptogams/. 15e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# 16e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# Permission to use under GPL terms is granted. 17d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ==================================================================== 18d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 19d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# SHA256 block procedure for ARMv4. May 2007. 20d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 21d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Performance is ~2x better than gcc 3.4 generated code and in "abso- 22d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per 23d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# byte [on single-issue Xscale PXA250 core]. 24d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 25d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# July 2010. 26d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 27d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Rescheduling for dual-issue pipeline resulted in 22% improvement on 28d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Cortex A8 core and ~20 cycles per processed byte. 29d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 30d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# February 2011. 31d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 32d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Profiler-assisted and platform-specific optimization resulted in 16% 33d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# improvement on Cortex A8 core and ~15.4 cycles per processed byte. 34d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 35d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# September 2013. 36d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 37d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Add NEON implementation. On Cortex A8 it was measured to process one 38d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon 39d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only 40d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# code (meaning that latter performs sub-optimally, nothing was done 41d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# about it). 42d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 43d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# May 2014. 44d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 45d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Add ARMv8 code path performing at 2.0 cpb on Apple A7. 46d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 47e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley$flavour = shift; 48c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminif ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } 49c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminelse { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } 50e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley 51e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleyif ($flavour && $flavour ne "void") { 52e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 53e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 54e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 55e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley die "can't locate arm-xlate.pl"; 56e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley 57e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley open STDOUT,"| \"$^X\" $xlate $flavour $output"; 58e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley} else { 59e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley open STDOUT,">$output"; 60e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley} 61d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 62d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$ctx="r0"; $t0="r0"; 63d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$inp="r1"; $t4="r1"; 64d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$len="r2"; $t1="r2"; 65d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$T1="r3"; $t3="r3"; 66d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$A="r4"; 67d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$B="r5"; 68d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$C="r6"; 69d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$D="r7"; 70d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$E="r8"; 71d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$F="r9"; 72d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$G="r10"; 73d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$H="r11"; 74d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@V=($A,$B,$C,$D,$E,$F,$G,$H); 75d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$t2="r12"; 76d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$Ktbl="r14"; 77d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 78d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@Sigma0=( 2,13,22); 79d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@Sigma1=( 6,11,25); 80d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@sigma0=( 7,18, 3); 81d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@sigma1=(17,19,10); 82d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 83d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub BODY_00_15 { 84d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 85d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 86d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___ if ($i<16); 87d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if __ARM_ARCH__>=7 88d9e397b599b13d642138480a28c14db7a136bf0Adam Langley @ ldr $t1,[$inp],#4 @ $i 89d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if $i==15 90d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $inp,[sp,#17*4] @ make room for $t4 91d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif 92d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 93d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 94d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 95e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# ifndef __ARMEB__ 96d9e397b599b13d642138480a28c14db7a136bf0Adam Langley rev $t1,$t1 97e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# endif 98d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else 99d9e397b599b13d642138480a28c14db7a136bf0Adam Langley @ ldrb $t1,[$inp,#3] @ $i 100d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 101d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldrb $t2,[$inp,#2] 102d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldrb $t0,[$inp,#1] 103d9e397b599b13d642138480a28c14db7a136bf0Adam Langley orr $t1,$t1,$t2,lsl#8 104d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldrb $t2,[$inp],#4 105d9e397b599b13d642138480a28c14db7a136bf0Adam Langley orr $t1,$t1,$t0,lsl#16 106d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if $i==15 107d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $inp,[sp,#17*4] @ make room for $t4 108d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif 109d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 110d9e397b599b13d642138480a28c14db7a136bf0Adam Langley orr $t1,$t1,$t2,lsl#24 111d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 112d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 113d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 114d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 115d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t2,[$Ktbl],#4 @ *K256++ 116d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $h,$h,$t1 @ h+=X[i] 117d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $t1,[sp,#`$i%16`*4] 118d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t1,$f,$g 119d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) 120d9e397b599b13d642138480a28c14db7a136bf0Adam Langley and $t1,$t1,$e 121d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $h,$h,$t2 @ h+=K256[i] 122d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t1,$t1,$g @ Ch(e,f,g) 123d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` 124d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $h,$h,$t1 @ h+=Ch(e,f,g) 125d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if $i==31 126d9e397b599b13d642138480a28c14db7a136bf0Adam Langley and $t2,$t2,#0xff 127d9e397b599b13d642138480a28c14db7a136bf0Adam Langley cmp $t2,#0xf2 @ done? 128d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 129d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if $i<15 130d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if __ARM_ARCH__>=7 131d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[$inp],#4 @ prefetch 132d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# else 133d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldrb $t1,[$inp,#3] 134d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif 135d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t2,$a,$b @ a^b, b^c in next round 136d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else 137d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx 138d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t2,$a,$b @ a^b, b^c in next round 139d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx 140d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 141d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) 142d9e397b599b13d642138480a28c14db7a136bf0Adam Langley and $t3,$t3,$t2 @ (b^c)&=(a^b) 143d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $d,$d,$h @ d+=h 144d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t3,$t3,$b @ Maj(a,b,c) 145d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) 146d9e397b599b13d642138480a28c14db7a136bf0Adam Langley @ add $h,$h,$t3 @ h+=Maj(a,b,c) 147d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 148d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ($t2,$t3)=($t3,$t2); 149d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 150d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 151d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub BODY_16_XX { 152d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 153d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 154d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 155d9e397b599b13d642138480a28c14db7a136bf0Adam Langley @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i 156d9e397b599b13d642138480a28c14db7a136bf0Adam Langley @ ldr $t4,[sp,#`($i+14)%16`*4] 157d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $t0,$t1,ror#$sigma0[0] 158d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 159d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $t2,$t4,ror#$sigma1[0] 160d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$t0,$t1,ror#$sigma0[1] 161d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t2,$t2,$t4,ror#$sigma1[1] 162d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 163d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[sp,#`($i+0)%16`*4] 164d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) 165d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t4,[sp,#`($i+9)%16`*4] 166d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 167d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $t2,$t2,$t0 168d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 169d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $t1,$t1,$t2 170d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 171d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $t1,$t1,$t4 @ X[i] 172d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 173d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &BODY_00_15(@_); 174d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 175d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 176d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code=<<___; 177e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#ifndef __KERNEL__ 178b8494591d1b1a143f3b192d845c238bbf3bc629dKenny Root# include <openssl/arm_arch.h> 179e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#else 180e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# define __ARM_ARCH__ __LINUX_ARM_ARCH__ 181e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# define __ARM_MAX_ARCH__ 7 182e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif 183d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 184d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.text 1851b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#if defined(__thumb2__) 186e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.syntax unified 187e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.thumb 1881b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#else 189e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.code 32 190e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif 191d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 192d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type K256,%object 193d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 5 194d9e397b599b13d642138480a28c14db7a136bf0Adam LangleyK256: 195d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 196d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 197d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 198d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 199d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 200d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 201d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 202d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 203d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 204d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 205d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 206d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 207d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 208d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 209d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 210d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 211d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size K256,.-K256 212d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word 0 @ terminator 213e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 214d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LOPENSSL_armcap: 215e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.word OPENSSL_armcap_P-.Lsha256_block_data_order 216d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 217d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 5 218d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 219d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.global sha256_block_data_order 220d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type sha256_block_data_order,%function 221d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysha256_block_data_order: 222e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.Lsha256_block_data_order: 2231b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#if __ARM_ARCH__<7 && !defined(__thumb2__) 224d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub r3,pc,#8 @ sha256_block_data_order 225e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#else 2261b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin adr r3,.Lsha256_block_data_order 227e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif 228e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 229d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr r12,.LOPENSSL_armcap 230d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr r12,[r3,r12] @ OPENSSL_armcap_P 231e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#ifdef __APPLE__ 232e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley ldr r12,[r12] 233e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif 234d9e397b599b13d642138480a28c14db7a136bf0Adam Langley tst r12,#ARMV8_SHA256 235d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bne .LARMv8 236d9e397b599b13d642138480a28c14db7a136bf0Adam Langley tst r12,#ARMV7_NEON 237d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bne .LNEON 238d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 239e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley add $len,$inp,$len,lsl#6 @ len to point at the end of inp 240d9e397b599b13d642138480a28c14db7a136bf0Adam Langley stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} 241d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 242d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $Ktbl,r3,#256+32 @ K256 243d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub sp,sp,#16*4 @ alloca(X[16]) 244d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop: 245d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if __ARM_ARCH__>=7 246d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[$inp],#4 247d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# else 248d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldrb $t1,[$inp,#3] 249d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif 250d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t3,$B,$C @ magic 251d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t2,$t2,$t2 252d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 253d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } 254d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=".Lrounds_16_xx:\n"; 255d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } 256d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 257e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_ARCH__>=7 258e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley ite eq @ Thumb2 thing, sanity check in ARM 259e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif 260d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldreq $t3,[sp,#16*4] @ pull ctx 261d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bne .Lrounds_16_xx 262d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 263d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 264d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t0,[$t3,#0] 265d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[$t3,#4] 266d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t2,[$t3,#8] 267d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $A,$A,$t0 268d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t0,[$t3,#12] 269d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $B,$B,$t1 270d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[$t3,#16] 271d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $C,$C,$t2 272d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t2,[$t3,#20] 273d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $D,$D,$t0 274d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t0,[$t3,#24] 275d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $E,$E,$t1 276d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[$t3,#28] 277d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $F,$F,$t2 278d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $inp,[sp,#17*4] @ pull inp 279d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t2,[sp,#18*4] @ pull inp+len 280d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $G,$G,$t0 281d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $H,$H,$t1 282d9e397b599b13d642138480a28c14db7a136bf0Adam Langley stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} 283d9e397b599b13d642138480a28c14db7a136bf0Adam Langley cmp $inp,$t2 284d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $Ktbl,$Ktbl,#256 @ rewind Ktbl 285d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bne .Loop 286d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 287d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add sp,sp,#`16+3`*4 @ destroy frame 288d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if __ARM_ARCH__>=5 289d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldmia sp!,{r4-r11,pc} 290d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else 291d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldmia sp!,{r4-r11,lr} 292d9e397b599b13d642138480a28c14db7a136bf0Adam Langley tst lr,#1 293d9e397b599b13d642138480a28c14db7a136bf0Adam Langley moveq pc,lr @ be binary compatible with V4, yet 294d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bx lr @ interoperable with Thumb ISA:-) 295d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 296d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size sha256_block_data_order,.-sha256_block_data_order 297d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 298d9e397b599b13d642138480a28c14db7a136bf0Adam Langley###################################################################### 299d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# NEON stuff 300d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 301d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{{{ 302d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @X=map("q$_",(0..3)); 303d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); 304d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $Xfer=$t4; 305d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $j=0; 306d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 307d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } 308d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } 309d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 310d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub AUTOLOAD() # thunk [simplified] x86-style perlasm 311d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; 312d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $arg = pop; 313d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $arg = "#$arg" if ($arg*1 eq $arg); 314d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; 315d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 316d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 317d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Xupdate() 318d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ use integer; 319d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $body = shift; 320d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my @insns = (&$body,&$body,&$body,&$body); 321d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my ($a,$b,$c,$d,$e,$f,$g,$h); 322d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 323d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] 324d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 325d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 326d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 327d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] 328d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 329d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 330d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 331d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T2,$T0,$sigma0[0]); 332d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 333d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 334d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] 335d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 336d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 337d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T1,$T0,$sigma0[2]); 338d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 339d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 340d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vsli_32 ($T2,$T0,32-$sigma0[0]); 341d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 342d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 343d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T3,$T0,$sigma0[1]); 344d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 345d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 346d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &veor ($T1,$T1,$T2); 347d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 348d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 349d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vsli_32 ($T3,$T0,32-$sigma0[1]); 350d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 351d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 352d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); 353d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 354d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 355d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &veor ($T1,$T1,$T3); # sigma0(X[1..4]) 356d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 357d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 358d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); 359d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 360d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 361d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); 362d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 363d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 364d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) 365d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 366d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 367d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &veor ($T5,$T5,$T4); 368d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 369d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 370d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); 371d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 372d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 373d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); 374d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 375d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 376d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &veor ($T5,$T5,$T4); # sigma1(X[14..15]) 377d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 378d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 379d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) 380d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 381d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 382d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); 383d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 384d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 385d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); 386d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 387d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 388d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); 389d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 390d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 391d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &veor ($T5,$T5,$T4); 392d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 393d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 394d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); 395d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 396d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 397d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 398d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 399d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 400d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); 401d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 402d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 403d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &veor ($T5,$T5,$T4); # sigma1(X[16..17]) 404d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 405d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 406d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) 407d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 408d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 409d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vadd_i32 ($T0,$T0,@X[0]); 410d9e397b599b13d642138480a28c14db7a136bf0Adam Langley while($#insns>=2) { eval(shift(@insns)); } 411d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vst1_32 ("{$T0}","[$Xfer,:128]!"); 412d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 413d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 414d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 415d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push(@X,shift(@X)); # "rotate" X[] 416d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 417d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 418d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Xpreload() 419d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ use integer; 420d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $body = shift; 421d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my @insns = (&$body,&$body,&$body,&$body); 422d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my ($a,$b,$c,$d,$e,$f,$g,$h); 423d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 424d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 425d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 426d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 427d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 428d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 429d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 430d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 431d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 432d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 433d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vrev32_8 (@X[0],@X[0]); 434d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 435d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 436d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 437d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eval(shift(@insns)); 438d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vadd_i32 ($T0,$T0,@X[0]); 439d9e397b599b13d642138480a28c14db7a136bf0Adam Langley foreach (@insns) { eval; } # remaining instructions 440d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &vst1_32 ("{$T0}","[$Xfer,:128]!"); 441d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 442d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push(@X,shift(@X)); # "rotate" X[] 443d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 444d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 445d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub body_00_15 () { 446d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ( 447d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. 448d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&add ($h,$h,$t1)', # h+=X[i]+K[i] 449d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t1,$f,$g)', 450d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', 451d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past 452d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&and ($t1,$t1,$e)', 453d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) 454d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', 455d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t1,$t1,$g)', # Ch(e,f,g) 456d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) 457d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t2,$a,$b)', # a^b, b^c in next round 458d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) 459d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&add ($h,$h,$t1)', # h+=Ch(e,f,g) 460d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. 461d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&ldr ($t1,"[$Ktbl]") if ($j==15);'. 462d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&ldr ($t1,"[sp,#64]") if ($j==31)', 463d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) 464d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&add ($d,$d,$h)', # d+=h 465d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) 466d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '&eor ($t3,$t3,$b)', # Maj(a,b,c) 467d9e397b599b13d642138480a28c14db7a136bf0Adam Langley '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' 468d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ) 469d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 470d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 471d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 472d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if __ARM_MAX_ARCH__>=7 473d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.arch armv7-a 474d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.fpu neon 475d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 476e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.global sha256_block_data_order_neon 477d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type sha256_block_data_order_neon,%function 4781b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin.align 5 4791b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin.skip 16 480d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysha256_block_data_order_neon: 481d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON: 482d9e397b599b13d642138480a28c14db7a136bf0Adam Langley stmdb sp!,{r4-r12,lr} 483d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 484e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley sub $H,sp,#16*4+16 4851b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin adr $Ktbl,K256 486e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley bic $H,$H,#15 @ align for 128-bit stores 487d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $t2,sp 488e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley mov sp,$H @ alloca 489e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley add $len,$inp,$len,lsl#6 @ len to point at the end of inp 490d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 491d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[0]},[$inp]! 492d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[1]},[$inp]! 493d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[2]},[$inp]! 494d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[3]},[$inp]! 495d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$T0},[$Ktbl,:128]! 496d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$T1},[$Ktbl,:128]! 497d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$T2},[$Ktbl,:128]! 498d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$T3},[$Ktbl,:128]! 499d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @X[0],@X[0] @ yes, even on 500d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $ctx,[sp,#64] 501d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @X[1],@X[1] @ big-endian 502d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $inp,[sp,#68] 503d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $Xfer,sp 504d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @X[2],@X[2] 505d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $len,[sp,#72] 506d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @X[3],@X[3] 507d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $t2,[sp,#76] @ save original sp 508d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $T0,$T0,@X[0] 509d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $T1,$T1,@X[1] 510d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vst1.32 {$T0},[$Xfer,:128]! 511d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $T2,$T2,@X[2] 512d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vst1.32 {$T1},[$Xfer,:128]! 513d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $T3,$T3,@X[3] 514d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vst1.32 {$T2},[$Xfer,:128]! 515d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vst1.32 {$T3},[$Xfer,:128]! 516d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 517d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldmia $ctx,{$A-$H} 518d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $Xfer,$Xfer,#64 519d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[sp,#0] 520d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t2,$t2,$t2 521d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eor $t3,$B,$C 522d9e397b599b13d642138480a28c14db7a136bf0Adam Langley b .L_00_48 523d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 524d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 4 525d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.L_00_48: 526d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 527d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xupdate(\&body_00_15); 528d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xupdate(\&body_00_15); 529d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xupdate(\&body_00_15); 530d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xupdate(\&body_00_15); 531d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 532d9e397b599b13d642138480a28c14db7a136bf0Adam Langley teq $t1,#0 @ check for K256 terminator 533d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t1,[sp,#0] 534d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $Xfer,$Xfer,#64 535d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bne .L_00_48 536d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 537d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $inp,[sp,#68] 538d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t0,[sp,#72] 539d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl 540d9e397b599b13d642138480a28c14db7a136bf0Adam Langley teq $inp,$t0 541e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley it eq 542d9e397b599b13d642138480a28c14db7a136bf0Adam Langley subeq $inp,$inp,#64 @ avoid SEGV 543d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[0]},[$inp]! @ load next input block 544d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[1]},[$inp]! 545d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[2]},[$inp]! 546d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@X[3]},[$inp]! 547e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley it ne 548d9e397b599b13d642138480a28c14db7a136bf0Adam Langley strne $inp,[sp,#68] 549d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $Xfer,sp 550d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 551d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xpreload(\&body_00_15); 552d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xpreload(\&body_00_15); 553d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xpreload(\&body_00_15); 554d9e397b599b13d642138480a28c14db7a136bf0Adam Langley &Xpreload(\&body_00_15); 555d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 556d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t0,[$t1,#0] 557d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 558d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t2,[$t1,#4] 559d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t3,[$t1,#8] 560d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t4,[$t1,#12] 561d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $A,$A,$t0 @ accumulate 562d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t0,[$t1,#16] 563d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $B,$B,$t2 564d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t2,[$t1,#20] 565d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $C,$C,$t3 566d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t3,[$t1,#24] 567d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $D,$D,$t4 568d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldr $t4,[$t1,#28] 569d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $E,$E,$t0 570d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $A,[$t1],#4 571d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $F,$F,$t2 572d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $B,[$t1],#4 573d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $G,$G,$t3 574d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $C,[$t1],#4 575d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $H,$H,$t4 576d9e397b599b13d642138480a28c14db7a136bf0Adam Langley str $D,[$t1],#4 577d9e397b599b13d642138480a28c14db7a136bf0Adam Langley stmia $t1,{$E-$H} 578d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 579e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley ittte ne 580d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movne $Xfer,sp 581d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldrne $t1,[sp,#0] 582d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eorne $t2,$t2,$t2 583d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldreq sp,[sp,#76] @ restore original sp 584e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley itt ne 585d9e397b599b13d642138480a28c14db7a136bf0Adam Langley eorne $t3,$B,$C 586d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bne .L_00_48 587d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 588d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ldmia sp!,{r4-r12,pc} 589d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size sha256_block_data_order_neon,.-sha256_block_data_order_neon 590d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 591d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 592d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}}} 593d9e397b599b13d642138480a28c14db7a136bf0Adam Langley###################################################################### 594d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ARMv8 stuff 595d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 596d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{{{ 597d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); 598d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @MSG=map("q$_",(8..11)); 599d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); 600d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $Ktbl="r3"; 601d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 602d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 603e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 604e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley 6051b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# if defined(__thumb2__) 606e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# define INST(a,b,c,d) .byte c,d|0xc,a,b 607e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# else 608e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# define INST(a,b,c,d) .byte a,b,c,d 609e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# endif 610e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley 611d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type sha256_block_data_order_armv8,%function 612d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 5 613d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysha256_block_data_order_armv8: 614d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LARMv8: 615d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$ABCD,$EFGH},[$ctx] 616e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley sub $Ktbl,$Ktbl,#256+32 617e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley add $len,$inp,$len,lsl#6 @ len to point at the end of inp 6181b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin b .Loop_v8 619d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 6201b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin.align 4 621d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop_v8: 622d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@MSG[0]-@MSG[1]},[$inp]! 623d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.8 {@MSG[2]-@MSG[3]},[$inp]! 624d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$W0},[$Ktbl]! 625d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @MSG[0],@MSG[0] 626d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @MSG[1],@MSG[1] 627d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @MSG[2],@MSG[2] 628d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vrev32.8 @MSG[3],@MSG[3] 629d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vmov $ABCD_SAVE,$ABCD @ offload 630d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vmov $EFGH_SAVE,$EFGH 631d9e397b599b13d642138480a28c14db7a136bf0Adam Langley teq $inp,$len 632d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 633d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor($i=0;$i<12;$i++) { 634d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 635d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$W1},[$Ktbl]! 636d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $W0,$W0,@MSG[0] 637d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256su0 @MSG[0],@MSG[1] 638d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vmov $abcd,$ABCD 639d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h $ABCD,$EFGH,$W0 640d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h2 $EFGH,$abcd,$W0 641d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256su1 @MSG[0],@MSG[2],@MSG[3] 642d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 643d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); 644d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 645d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 646d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$W1},[$Ktbl]! 647d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $W0,$W0,@MSG[0] 648d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vmov $abcd,$ABCD 649d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h $ABCD,$EFGH,$W0 650d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h2 $EFGH,$abcd,$W0 651d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 652d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$W0},[$Ktbl]! 653d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $W1,$W1,@MSG[1] 654d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vmov $abcd,$ABCD 655d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h $ABCD,$EFGH,$W1 656d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h2 $EFGH,$abcd,$W1 657d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 658d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vld1.32 {$W1},[$Ktbl] 659d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $W0,$W0,@MSG[2] 660d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $Ktbl,$Ktbl,#256-16 @ rewind 661d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vmov $abcd,$ABCD 662d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h $ABCD,$EFGH,$W0 663d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h2 $EFGH,$abcd,$W0 664d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 665d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $W1,$W1,@MSG[3] 666d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vmov $abcd,$ABCD 667d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h $ABCD,$EFGH,$W1 668d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sha256h2 $EFGH,$abcd,$W1 669d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 670d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $ABCD,$ABCD,$ABCD_SAVE 671d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vadd.i32 $EFGH,$EFGH,$EFGH_SAVE 672e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley it ne 673d9e397b599b13d642138480a28c14db7a136bf0Adam Langley bne .Loop_v8 674d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 675d9e397b599b13d642138480a28c14db7a136bf0Adam Langley vst1.32 {$ABCD,$EFGH},[$ctx] 676d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 677d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ret @ bx lr 678d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 679d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 680d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 681d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}}} 682d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 683d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" 684d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 2 685e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 686d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.comm OPENSSL_armcap_P,4,4 68713066f1c7a8aaacf80e8fe62f31526fa7cae8072Adam Langley.hidden OPENSSL_armcap_P 688d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif 689d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 690d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 691e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleyopen SELF,$0; 692e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleywhile(<SELF>) { 693e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley next if (/^#!/); 694e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley last if (!s/^#/@/ and !/^$/); 695e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley print; 696e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley} 697e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleyclose SELF; 698e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley 699d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ my %opcode = ( 700d9e397b599b13d642138480a28c14db7a136bf0Adam Langley "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, 701d9e397b599b13d642138480a28c14db7a136bf0Adam Langley "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); 702d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 703d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub unsha256 { 704d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my ($mnemonic,$arg)=@_; 705d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 706d9e397b599b13d642138480a28c14db7a136bf0Adam Langley if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { 707d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) 708d9e397b599b13d642138480a28c14db7a136bf0Adam Langley |(($2&7)<<17)|(($2&8)<<4) 709d9e397b599b13d642138480a28c14db7a136bf0Adam Langley |(($3&7)<<1) |(($3&8)<<2); 710d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # since ARMv7 instructions are always encoded little-endian. 711d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # correct solution is to use .inst directive, but older 712d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # assemblers don't implement it:-( 713e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", 714d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $word&0xff,($word>>8)&0xff, 715d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ($word>>16)&0xff,($word>>24)&0xff, 716d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $mnemonic,$arg; 717d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } 718d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } 719d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 720d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 721d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyforeach (split($/,$code)) { 722d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 723d9e397b599b13d642138480a28c14db7a136bf0Adam Langley s/\`([^\`]*)\`/eval $1/geo; 724d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 725d9e397b599b13d642138480a28c14db7a136bf0Adam Langley s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; 726d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 727d9e397b599b13d642138480a28c14db7a136bf0Adam Langley s/\bret\b/bx lr/go or 728d9e397b599b13d642138480a28c14db7a136bf0Adam Langley s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 729d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 730d9e397b599b13d642138480a28c14db7a136bf0Adam Langley print $_,"\n"; 731d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 732d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 733d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyclose STDOUT; # enforce flush 734