11b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#! /usr/bin/env perl
21b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
31b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#
41b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# Licensed under the OpenSSL license (the "License").  You may not use
51b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# this file except in compliance with the License.  You can obtain a copy
61b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# in the file LICENSE in the source distribution or at
71b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# https://www.openssl.org/source/license.html
81b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin
9d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
10d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ====================================================================
11d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# project. The module is, however, dual licensed under OpenSSL and
13d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# CRYPTOGAMS licenses depending on where you obtain it. For further
14d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# details see http://www.openssl.org/~appro/cryptogams/.
15e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#
16e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# Permission to use under GPL terms is granted.
17d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ====================================================================
18d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
19d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# SHA256 block procedure for ARMv4. May 2007.
20d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
21d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Performance is ~2x better than gcc 3.4 generated code and in "abso-
22d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
23d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# byte [on single-issue Xscale PXA250 core].
24d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
25d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# July 2010.
26d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#
27d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Rescheduling for dual-issue pipeline resulted in 22% improvement on
28d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Cortex A8 core and ~20 cycles per processed byte.
29d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
30d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# February 2011.
31d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#
32d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Profiler-assisted and platform-specific optimization resulted in 16%
33d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
34d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
35d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# September 2013.
36d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#
37d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Add NEON implementation. On Cortex A8 it was measured to process one
38d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
39d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
40d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# code (meaning that latter performs sub-optimally, nothing was done
41d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# about it).
42d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
43d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# May 2014.
44d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#
45d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
46d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
47e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley$flavour = shift;
48c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminif ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
49c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminelse { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
50e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
51e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleyif ($flavour && $flavour ne "void") {
52e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
53e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
54e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
55e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley    die "can't locate arm-xlate.pl";
56e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
57e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley    open STDOUT,"| \"$^X\" $xlate $flavour $output";
58e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley} else {
59e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley    open STDOUT,">$output";
60e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley}
61d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
62d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$ctx="r0";	$t0="r0";
63d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$inp="r1";	$t4="r1";
64d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$len="r2";	$t1="r2";
65d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$T1="r3";	$t3="r3";
66d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$A="r4";
67d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$B="r5";
68d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$C="r6";
69d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$D="r7";
70d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$E="r8";
71d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$F="r9";
72d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$G="r10";
73d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$H="r11";
74d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@V=($A,$B,$C,$D,$E,$F,$G,$H);
75d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$t2="r12";
76d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$Ktbl="r14";
77d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
78d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@Sigma0=( 2,13,22);
79d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@Sigma1=( 6,11,25);
80d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@sigma0=( 7,18, 3);
81d9e397b599b13d642138480a28c14db7a136bf0Adam Langley@sigma1=(17,19,10);
82d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
83d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub BODY_00_15 {
84d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
85d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
86d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___ if ($i<16);
87d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if __ARM_ARCH__>=7
88d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	@ ldr	$t1,[$inp],#4			@ $i
89d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if $i==15
90d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	$inp,[sp,#17*4]			@ make room for $t4
91d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif
92d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
93d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
94d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
95e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# ifndef __ARMEB__
96d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	rev	$t1,$t1
97e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# endif
98d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else
99d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	@ ldrb	$t1,[$inp,#3]			@ $i
100d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
101d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldrb	$t2,[$inp,#2]
102d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldrb	$t0,[$inp,#1]
103d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	orr	$t1,$t1,$t2,lsl#8
104d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldrb	$t2,[$inp],#4
105d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	orr	$t1,$t1,$t0,lsl#16
106d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if $i==15
107d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	$inp,[sp,#17*4]			@ make room for $t4
108d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif
109d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
110d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	orr	$t1,$t1,$t2,lsl#24
111d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
112d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
113d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
114d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
115d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t2,[$Ktbl],#4			@ *K256++
116d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$h,$h,$t1			@ h+=X[i]
117d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	$t1,[sp,#`$i%16`*4]
118d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t1,$f,$g
119d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$h,$h,$t0,ror#$Sigma1[0]	@ h+=Sigma1(e)
120d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	and	$t1,$t1,$e
121d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$h,$h,$t2			@ h+=K256[i]
122d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t1,$t1,$g			@ Ch(e,f,g)
123d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
124d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$h,$h,$t1			@ h+=Ch(e,f,g)
125d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if $i==31
126d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	and	$t2,$t2,#0xff
127d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	cmp	$t2,#0xf2			@ done?
128d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
129d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if $i<15
130d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if __ARM_ARCH__>=7
131d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[$inp],#4			@ prefetch
132d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# else
133d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldrb	$t1,[$inp,#3]
134d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif
135d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t2,$a,$b			@ a^b, b^c in next round
136d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else
137d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[sp,#`($i+2)%16`*4]		@ from future BODY_16_xx
138d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t2,$a,$b			@ a^b, b^c in next round
139d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t4,[sp,#`($i+15)%16`*4]	@ from future BODY_16_xx
140d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
141d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]`	@ Sigma0(a)
142d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	and	$t3,$t3,$t2			@ (b^c)&=(a^b)
143d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$d,$d,$h			@ d+=h
144d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t3,$t3,$b			@ Maj(a,b,c)
145d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$h,$h,$t0,ror#$Sigma0[0]	@ h+=Sigma0(a)
146d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	@ add	$h,$h,$t3			@ h+=Maj(a,b,c)
147d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
148d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	($t2,$t3)=($t3,$t2);
149d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
150d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
151d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub BODY_16_XX {
152d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
153d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
154d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
155d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	@ ldr	$t1,[sp,#`($i+1)%16`*4]		@ $i
156d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	@ ldr	$t4,[sp,#`($i+14)%16`*4]
157d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	$t0,$t1,ror#$sigma0[0]
158d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
159d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	$t2,$t4,ror#$sigma1[0]
160d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$t0,$t1,ror#$sigma0[1]
161d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t2,$t2,$t4,ror#$sigma1[1]
162d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1])
163d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[sp,#`($i+0)%16`*4]
164d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t2,$t2,$t4,lsr#$sigma1[2]	@ sigma1(X[i+14])
165d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t4,[sp,#`($i+9)%16`*4]
166d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
167d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$t2,$t2,$t0
168d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`	@ from BODY_00_15
169d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$t1,$t1,$t2
170d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
171d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$t1,$t1,$t4			@ X[i]
172d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
173d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&BODY_00_15(@_);
174d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
175d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
176d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code=<<___;
177e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#ifndef __KERNEL__
178b8494591d1b1a143f3b192d845c238bbf3bc629dKenny Root# include <openssl/arm_arch.h>
179e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#else
180e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# define __ARM_ARCH__ __LINUX_ARM_ARCH__
181e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# define __ARM_MAX_ARCH__ 7
182e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif
183d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
184d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.text
1851b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#if defined(__thumb2__)
186e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.syntax unified
187e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.thumb
1881b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#else
189e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.code   32
190e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif
191d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
192d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type	K256,%object
193d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	5
194d9e397b599b13d642138480a28c14db7a136bf0Adam LangleyK256:
195d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
196d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
197d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
198d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
199d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
200d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
201d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
202d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
203d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
204d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
205d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
206d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
207d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
208d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
209d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
210d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
211d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size	K256,.-K256
212d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.word	0				@ terminator
213e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
214d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LOPENSSL_armcap:
215e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.word	OPENSSL_armcap_P-.Lsha256_block_data_order
216d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
217d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	5
218d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
219d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.global	sha256_block_data_order
220d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type	sha256_block_data_order,%function
221d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysha256_block_data_order:
222e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.Lsha256_block_data_order:
2231b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin#if __ARM_ARCH__<7 && !defined(__thumb2__)
224d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r3,pc,#8		@ sha256_block_data_order
225e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#else
2261b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin	adr	r3,.Lsha256_block_data_order
227e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif
228e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
229d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r12,.LOPENSSL_armcap
230d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
231e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#ifdef	__APPLE__
232e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldr	r12,[r12]
233e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif
234d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	tst	r12,#ARMV8_SHA256
235d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LARMv8
236d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	tst	r12,#ARMV7_NEON
237d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON
238d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
239e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
240d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	stmdb	sp!,{$ctx,$inp,$len,r4-r11,lr}
241d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
242d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	$Ktbl,r3,#256+32	@ K256
243d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	sp,sp,#16*4		@ alloca(X[16])
244d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop:
245d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# if __ARM_ARCH__>=7
246d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[$inp],#4
247d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# else
248d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldrb	$t1,[$inp,#3]
249d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# endif
250d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t3,$B,$C		@ magic
251d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor	$t2,$t2,$t2
252d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
253d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
254d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=".Lrounds_16_xx:\n";
255d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
256d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
257e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_ARCH__>=7
258e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ite	eq			@ Thumb2 thing, sanity check in ARM
259e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif
260d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldreq	$t3,[sp,#16*4]		@ pull ctx
261d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.Lrounds_16_xx
262d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
263d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$A,$A,$t2		@ h+=Maj(a,b,c) from the past
264d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t0,[$t3,#0]
265d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[$t3,#4]
266d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t2,[$t3,#8]
267d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$A,$A,$t0
268d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t0,[$t3,#12]
269d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$B,$B,$t1
270d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[$t3,#16]
271d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$C,$C,$t2
272d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t2,[$t3,#20]
273d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$D,$D,$t0
274d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t0,[$t3,#24]
275d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$E,$E,$t1
276d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[$t3,#28]
277d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$F,$F,$t2
278d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$inp,[sp,#17*4]		@ pull inp
279d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t2,[sp,#18*4]		@ pull inp+len
280d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$G,$G,$t0
281d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$H,$H,$t1
282d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	stmia	$t3,{$A,$B,$C,$D,$E,$F,$G,$H}
283d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	cmp	$inp,$t2
284d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl
285d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.Loop
286d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
287d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	sp,sp,#`16+3`*4	@ destroy frame
288d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if __ARM_ARCH__>=5
289d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldmia	sp!,{r4-r11,pc}
290d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#else
291d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldmia	sp!,{r4-r11,lr}
292d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	tst	lr,#1
293d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	moveq	pc,lr			@ be binary compatible with V4, yet
294d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bx	lr			@ interoperable with Thumb ISA:-)
295d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
296d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size	sha256_block_data_order,.-sha256_block_data_order
297d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
298d9e397b599b13d642138480a28c14db7a136bf0Adam Langley######################################################################
299d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# NEON stuff
300d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#
301d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{{{
302d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @X=map("q$_",(0..3));
303d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
304d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $Xfer=$t4;
305d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $j=0;
306d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
307d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
308d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
309d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
310d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub AUTOLOAD()          # thunk [simplified] x86-style perlasm
311d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
312d9e397b599b13d642138480a28c14db7a136bf0Adam Langley  my $arg = pop;
313d9e397b599b13d642138480a28c14db7a136bf0Adam Langley    $arg = "#$arg" if ($arg*1 eq $arg);
314d9e397b599b13d642138480a28c14db7a136bf0Adam Langley    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
315d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
316d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
317d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Xupdate()
318d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ use integer;
319d9e397b599b13d642138480a28c14db7a136bf0Adam Langley  my $body = shift;
320d9e397b599b13d642138480a28c14db7a136bf0Adam Langley  my @insns = (&$body,&$body,&$body,&$body);
321d9e397b599b13d642138480a28c14db7a136bf0Adam Langley  my ($a,$b,$c,$d,$e,$f,$g,$h);
322d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
323d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vext_8		($T0,@X[0],@X[1],4);	# X[1..4]
324d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
325d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
326d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
327d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vext_8		($T1,@X[2],@X[3],4);	# X[9..12]
328d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
329d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
330d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
331d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vshr_u32	($T2,$T0,$sigma0[0]);
332d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
333d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
334d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += X[9..12]
335d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
336d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
337d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vshr_u32	($T1,$T0,$sigma0[2]);
338d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
339d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
340d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vsli_32	($T2,$T0,32-$sigma0[0]);
341d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
342d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
343d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vshr_u32	($T3,$T0,$sigma0[1]);
344d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
345d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
346d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&veor		($T1,$T1,$T2);
347d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
348d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
349d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vsli_32	($T3,$T0,32-$sigma0[1]);
350d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
351d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
352d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[0]);
353d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
354d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
355d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&veor		($T1,$T1,$T3);		# sigma0(X[1..4])
356d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
357d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
358d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[0]);
359d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
360d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
361d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vshr_u32	($T5,&Dhi(@X[3]),$sigma1[2]);
362d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
363d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
364d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += sigma0(X[1..4])
365d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
366d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
367d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &veor		($T5,$T5,$T4);
368d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
369d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
370d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[1]);
371d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
372d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
373d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[1]);
374d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
375d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
376d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &veor		($T5,$T5,$T4);		# sigma1(X[14..15])
377d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
378d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
379d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vadd_i32	(&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
380d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
381d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
382d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[0]);
383d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
384d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
385d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[0]);
386d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
387d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
388d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vshr_u32	($T5,&Dlo(@X[0]),$sigma1[2]);
389d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
390d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
391d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &veor		($T5,$T5,$T4);
392d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
393d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
394d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[1]);
395d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
396d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
397d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vld1_32	("{$T0}","[$Ktbl,:128]!");
398d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
399d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
400d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[1]);
401d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
402d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
403d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	  &veor		($T5,$T5,$T4);		# sigma1(X[16..17])
404d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
405d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
406d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vadd_i32	(&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
407d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
408d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
409d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vadd_i32	($T0,$T0,@X[0]);
410d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 while($#insns>=2) { eval(shift(@insns)); }
411d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vst1_32	("{$T0}","[$Xfer,:128]!");
412d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
413d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
414d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
415d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	push(@X,shift(@X));		# "rotate" X[]
416d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
417d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
418d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub Xpreload()
419d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{ use integer;
420d9e397b599b13d642138480a28c14db7a136bf0Adam Langley  my $body = shift;
421d9e397b599b13d642138480a28c14db7a136bf0Adam Langley  my @insns = (&$body,&$body,&$body,&$body);
422d9e397b599b13d642138480a28c14db7a136bf0Adam Langley  my ($a,$b,$c,$d,$e,$f,$g,$h);
423d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
424d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
425d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
426d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
427d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
428d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vld1_32	("{$T0}","[$Ktbl,:128]!");
429d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
430d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
431d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
432d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
433d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vrev32_8	(@X[0],@X[0]);
434d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
435d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
436d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
437d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 eval(shift(@insns));
438d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vadd_i32	($T0,$T0,@X[0]);
439d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	 foreach (@insns) { eval; }	# remaining instructions
440d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&vst1_32	("{$T0}","[$Xfer,:128]!");
441d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
442d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	push(@X,shift(@X));		# "rotate" X[]
443d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
444d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
445d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub body_00_15 () {
446d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	(
447d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
448d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&add	($h,$h,$t1)',			# h+=X[i]+K[i]
449d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t1,$f,$g)',
450d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
451d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&add	($a,$a,$t2)',			# h+=Maj(a,b,c) from the past
452d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&and	($t1,$t1,$e)',
453d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',	# Sigma1(e)
454d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
455d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t1,$t1,$g)',			# Ch(e,f,g)
456d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&add	($h,$h,$t2,"ror#$Sigma1[0]")',	# h+=Sigma1(e)
457d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t2,$a,$b)',			# a^b, b^c in next round
458d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',	# Sigma0(a)
459d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&add	($h,$h,$t1)',			# h+=Ch(e,f,g)
460d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&ldr	($t1,sprintf "[sp,#%d]",4*(($j+1)&15))	if (($j&15)!=15);'.
461d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&ldr	($t1,"[$Ktbl]")				if ($j==15);'.
462d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&ldr	($t1,"[sp,#64]")			if ($j==31)',
463d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&and	($t3,$t3,$t2)',			# (b^c)&=(a^b)
464d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&add	($d,$d,$h)',			# d+=h
465d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&add	($h,$h,$t0,"ror#$Sigma0[0]");'.	# h+=Sigma0(a)
466d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'&eor	($t3,$t3,$b)',			# Maj(a,b,c)
467d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	'$j++;	unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
468d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	)
469d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
470d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
471d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
472d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#if __ARM_MAX_ARCH__>=7
473d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.arch	armv7-a
474d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.fpu	neon
475d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
476e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.global	sha256_block_data_order_neon
477d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type	sha256_block_data_order_neon,%function
4781b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin.align	5
4791b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin.skip	16
480d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysha256_block_data_order_neon:
481d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON:
482d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	stmdb	sp!,{r4-r12,lr}
483d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
484e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	$H,sp,#16*4+16
4851b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin	adr	$Ktbl,K256
486e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	bic	$H,$H,#15		@ align for 128-bit stores
487d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	$t2,sp
488e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	mov	sp,$H			@ alloca
489e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
490d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
491d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[0]},[$inp]!
492d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[1]},[$inp]!
493d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[2]},[$inp]!
494d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[3]},[$inp]!
495d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$T0},[$Ktbl,:128]!
496d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$T1},[$Ktbl,:128]!
497d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$T2},[$Ktbl,:128]!
498d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$T3},[$Ktbl,:128]!
499d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@X[0],@X[0]		@ yes, even on
500d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str		$ctx,[sp,#64]
501d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@X[1],@X[1]		@ big-endian
502d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str		$inp,[sp,#68]
503d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov		$Xfer,sp
504d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@X[2],@X[2]
505d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str		$len,[sp,#72]
506d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@X[3],@X[3]
507d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str		$t2,[sp,#76]		@ save original sp
508d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$T0,$T0,@X[0]
509d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$T1,$T1,@X[1]
510d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vst1.32		{$T0},[$Xfer,:128]!
511d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$T2,$T2,@X[2]
512d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vst1.32		{$T1},[$Xfer,:128]!
513d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$T3,$T3,@X[3]
514d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vst1.32		{$T2},[$Xfer,:128]!
515d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vst1.32		{$T3},[$Xfer,:128]!
516d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
517d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldmia		$ctx,{$A-$H}
518d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub		$Xfer,$Xfer,#64
519d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr		$t1,[sp,#0]
520d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor		$t2,$t2,$t2
521d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eor		$t3,$B,$C
522d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	b		.L_00_48
523d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
524d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	4
525d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.L_00_48:
526d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
527d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xupdate(\&body_00_15);
528d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xupdate(\&body_00_15);
529d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xupdate(\&body_00_15);
530d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xupdate(\&body_00_15);
531d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
532d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	teq	$t1,#0				@ check for K256 terminator
533d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t1,[sp,#0]
534d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	$Xfer,$Xfer,#64
535d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.L_00_48
536d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
537d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr		$inp,[sp,#68]
538d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr		$t0,[sp,#72]
539d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub		$Ktbl,$Ktbl,#256	@ rewind $Ktbl
540d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	teq		$inp,$t0
541e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	it		eq
542d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	subeq		$inp,$inp,#64		@ avoid SEGV
543d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[0]},[$inp]!		@ load next input block
544d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[1]},[$inp]!
545d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[2]},[$inp]!
546d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@X[3]},[$inp]!
547e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	it		ne
548d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	strne		$inp,[sp,#68]
549d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov		$Xfer,sp
550d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
551d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xpreload(\&body_00_15);
552d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xpreload(\&body_00_15);
553d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xpreload(\&body_00_15);
554d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	&Xpreload(\&body_00_15);
555d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
556d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t0,[$t1,#0]
557d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$A,$A,$t2			@ h+=Maj(a,b,c) from the past
558d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t2,[$t1,#4]
559d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t3,[$t1,#8]
560d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t4,[$t1,#12]
561d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$A,$A,$t0			@ accumulate
562d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t0,[$t1,#16]
563d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$B,$B,$t2
564d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t2,[$t1,#20]
565d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$C,$C,$t3
566d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t3,[$t1,#24]
567d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$D,$D,$t4
568d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	$t4,[$t1,#28]
569d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$E,$E,$t0
570d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	$A,[$t1],#4
571d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$F,$F,$t2
572d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	$B,[$t1],#4
573d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$G,$G,$t3
574d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	$C,[$t1],#4
575d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	$H,$H,$t4
576d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	$D,[$t1],#4
577d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	stmia	$t1,{$E-$H}
578d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
579e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ittte	ne
580d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movne	$Xfer,sp
581d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldrne	$t1,[sp,#0]
582d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eorne	$t2,$t2,$t2
583d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldreq	sp,[sp,#76]			@ restore original sp
584e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	itt	ne
585d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	eorne	$t3,$B,$C
586d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.L_00_48
587d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
588d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldmia	sp!,{r4-r12,pc}
589d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
590d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
591d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
592d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}}}
593d9e397b599b13d642138480a28c14db7a136bf0Adam Langley######################################################################
594d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ARMv8 stuff
595d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#
596d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{{{
597d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
598d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @MSG=map("q$_",(8..11));
599d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
600d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $Ktbl="r3";
601d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
602d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
603e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
604e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
6051b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin# if defined(__thumb2__)
606e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
607e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# else
608e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#  define INST(a,b,c,d)	.byte	a,b,c,d
609e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley# endif
610e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
611d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type	sha256_block_data_order_armv8,%function
612d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	5
613d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysha256_block_data_order_armv8:
614d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LARMv8:
615d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32	{$ABCD,$EFGH},[$ctx]
616e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	$Ktbl,$Ktbl,#256+32
617e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
6181b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin	b	.Loop_v8
619d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
6201b249678059ecd918235790a7a0471771cc4e5ceDavid Benjamin.align	4
621d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop_v8:
622d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@MSG[0]-@MSG[1]},[$inp]!
623d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.8		{@MSG[2]-@MSG[3]},[$inp]!
624d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$W0},[$Ktbl]!
625d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@MSG[0],@MSG[0]
626d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@MSG[1],@MSG[1]
627d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@MSG[2],@MSG[2]
628d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vrev32.8	@MSG[3],@MSG[3]
629d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmov		$ABCD_SAVE,$ABCD	@ offload
630d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmov		$EFGH_SAVE,$EFGH
631d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	teq		$inp,$len
632d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
633d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor($i=0;$i<12;$i++) {
634d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
635d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$W1},[$Ktbl]!
636d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$W0,$W0,@MSG[0]
637d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256su0	@MSG[0],@MSG[1]
638d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmov		$abcd,$ABCD
639d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h		$ABCD,$EFGH,$W0
640d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h2	$EFGH,$abcd,$W0
641d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256su1	@MSG[0],@MSG[2],@MSG[3]
642d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
643d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
644d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
645d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
646d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$W1},[$Ktbl]!
647d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$W0,$W0,@MSG[0]
648d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmov		$abcd,$ABCD
649d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h		$ABCD,$EFGH,$W0
650d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h2	$EFGH,$abcd,$W0
651d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
652d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$W0},[$Ktbl]!
653d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$W1,$W1,@MSG[1]
654d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmov		$abcd,$ABCD
655d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h		$ABCD,$EFGH,$W1
656d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h2	$EFGH,$abcd,$W1
657d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
658d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vld1.32		{$W1},[$Ktbl]
659d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$W0,$W0,@MSG[2]
660d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub		$Ktbl,$Ktbl,#256-16	@ rewind
661d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmov		$abcd,$ABCD
662d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h		$ABCD,$EFGH,$W0
663d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h2	$EFGH,$abcd,$W0
664d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
665d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$W1,$W1,@MSG[3]
666d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmov		$abcd,$ABCD
667d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h		$ABCD,$EFGH,$W1
668d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sha256h2	$EFGH,$abcd,$W1
669d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
670d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$ABCD,$ABCD,$ABCD_SAVE
671d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.i32	$EFGH,$EFGH,$EFGH_SAVE
672e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	it		ne
673d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne		.Loop_v8
674d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
675d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vst1.32		{$ABCD,$EFGH},[$ctx]
676d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
677d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ret		@ bx lr
678d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
679d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
680d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
681d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}}}
682d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___;
683d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
684d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	2
685e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
686d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.comm   OPENSSL_armcap_P,4,4
68713066f1c7a8aaacf80e8fe62f31526fa7cae8072Adam Langley.hidden OPENSSL_armcap_P
688d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
689d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___
690d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
691e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleyopen SELF,$0;
692e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleywhile(<SELF>) {
693e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	next if (/^#!/);
694e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	last if (!s/^#/@/ and !/^$/);
695e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	print;
696e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley}
697e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langleyclose SELF;
698e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
699d9e397b599b13d642138480a28c14db7a136bf0Adam Langley{   my  %opcode = (
700d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	"sha256h"	=> 0xf3000c40,	"sha256h2"	=> 0xf3100c40,
701d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	"sha256su0"	=> 0xf3ba03c0,	"sha256su1"	=> 0xf3200c40	);
702d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
703d9e397b599b13d642138480a28c14db7a136bf0Adam Langley    sub unsha256 {
704d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	my ($mnemonic,$arg)=@_;
705d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
706d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
707d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
708d9e397b599b13d642138480a28c14db7a136bf0Adam Langley					 |(($2&7)<<17)|(($2&8)<<4)
709d9e397b599b13d642138480a28c14db7a136bf0Adam Langley					 |(($3&7)<<1) |(($3&8)<<2);
710d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	    # since ARMv7 instructions are always encoded little-endian.
711d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	    # correct solution is to use .inst directive, but older
712d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	    # assemblers don't implement it:-(
713e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	    sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
714d9e397b599b13d642138480a28c14db7a136bf0Adam Langley			$word&0xff,($word>>8)&0xff,
715d9e397b599b13d642138480a28c14db7a136bf0Adam Langley			($word>>16)&0xff,($word>>24)&0xff,
716d9e397b599b13d642138480a28c14db7a136bf0Adam Langley			$mnemonic,$arg;
717d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	}
718d9e397b599b13d642138480a28c14db7a136bf0Adam Langley    }
719d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
720d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
721d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyforeach (split($/,$code)) {
722d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
723d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	s/\`([^\`]*)\`/eval $1/geo;
724d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
725d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
726d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
727d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	s/\bret\b/bx	lr/go		or
728d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
729d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
730d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	print $_,"\n";
731d9e397b599b13d642138480a28c14db7a136bf0Adam Langley}
732d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
733d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyclose STDOUT; # enforce flush
734