18ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#! /usr/bin/env perl
28ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
38ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#
48ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Licensed under the OpenSSL license (the "License").  You may not use
58ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# this file except in compliance with the License.  You can obtain a copy
68ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# in the file LICENSE in the source distribution or at
78ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# https://www.openssl.org/source/license.html
88ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
94969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#
104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ====================================================================
114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# project. The module is, however, dual licensed under OpenSSL and
134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# CRYPTOGAMS licenses depending on where you obtain it. For further
144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# details see http://www.openssl.org/~appro/cryptogams/.
154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ====================================================================
164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#
174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# December 2014
18a94fe0531b3c196ad078174259af2201b2e3a246Robert Sloan#
194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ChaCha20 for ARMv4.
204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#
214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Performance in cycles per byte out of large buffer.
224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#
234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#			IALU/gcc-4.4    1xNEON      3xNEON+1xIALU
244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#
254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A5		19.3(*)/+95%    21.8        14.1
264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A8		10.5(*)/+160%   13.9        6.35
274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A9		12.9(**)/+110%  14.3        6.50
284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A15		11.0/+40%       16.0        5.00
294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Snapdragon S4		11.5/+125%      13.6        4.90
304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#
314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# (*)	most "favourable" result for aligned data on little-endian
324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#	processor, result for misaligned data is 10-15% lower;
334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# (**)	this result is a trade-off: it can be improved by 20%,
344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#	but then Snapdragon S4 and Cortex-A8 results get
354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#	20-25% worse;
364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$flavour = shift;
38c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminif ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
39c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminelse { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminif ($flavour && $flavour ne "void") {
424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    die "can't locate arm-xlate.pl";
464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    open STDOUT,"| \"$^X\" $xlate $flavour $output";
484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} else {
494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    open STDOUT,">$output";
504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}
514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminsub AUTOLOAD()		# thunk [simplified] x86-style perlasm
534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin  my $arg = pop;
554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    $arg = "#$arg" if ($arg*1 eq $arg);
564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}
584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy @x=map("r$_",(0..7,"x","x","x","x",12,"x",14,"x"));
604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy @t=map("r$_",(8..11));
614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminsub ROUND {
634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a0,$b0,$c0,$d0)=@_;
644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy $odd = $d0&1;
684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($xc,$xc_) = (@t[0..1]);
694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($xd,$xd_) = $odd ? (@t[2],@x[$d1]) : (@x[$d0],@t[2]);
704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy @ret;
714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# Consider order in which variables are addressed by their
734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# index:
744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#
754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       a   b   c   d
764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#
774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       0   4   8  12 < even round
784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       1   5   9  13
794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       2   6  10  14
804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       3   7  11  15
814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       0   5  10  15 < odd round
824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       1   6  11  12
834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       2   7   8  13
844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#       3   4   9  14
854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	#
864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# 'a', 'b' are permanently allocated in registers, @x[0..7],
874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# while 'c's and pair of 'd's are maintained in memory. If
884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# you observe 'c' column, you'll notice that pair of 'c's is
894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# invariant between rounds. This means that we have to reload
904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# them once per round, in the middle. This is why you'll see
914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# bunch of 'c' stores and loads in the middle, but none in
924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# the beginning or end. If you observe 'd' column, you'll
934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# notice that 15 and 13 are reused in next pair of rounds.
944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# This is why these two are chosen for offloading to memory,
954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	# to make loads count more.
964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin							push @ret,(
974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	(@x[$a0],@x[$a0],@x[$b0])",
984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	($xd,$xd,'ror#16')",
994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	(@x[$a1],@x[$a1],@x[$b1])",
1004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	($xd_,$xd_,'ror#16')",
1014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	($xd,$xd,@x[$a0],'ror#16')",
1024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	($xd_,$xd_,@x[$a1],'ror#16')",
1034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	($xc,$xc,$xd)",
1054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	(@x[$b0],@x[$b0],'ror#20')",
1064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	($xc_,$xc_,$xd_)",
1074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	(@x[$b1],@x[$b1],'ror#20')",
1084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	(@x[$b0],@x[$b0],$xc,'ror#20')",
1094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	(@x[$b1],@x[$b1],$xc_,'ror#20')",
1104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	(@x[$a0],@x[$a0],@x[$b0])",
1124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	($xd,$xd,'ror#24')",
1134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	(@x[$a1],@x[$a1],@x[$b1])",
1144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	($xd_,$xd_,'ror#24')",
1154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	($xd,$xd,@x[$a0],'ror#24')",
1164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	($xd_,$xd_,@x[$a1],'ror#24')",
1174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	($xc,$xc,$xd)",
1194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	(@x[$b0],@x[$b0],'ror#25')"		);
1204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin							push @ret,(
1214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&str	($xd,'[sp,#4*(16+$d0)]')",
1224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&ldr	($xd,'[sp,#4*(16+$d2)]')"		) if ($odd);
1234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin							push @ret,(
1244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	($xc_,$xc_,$xd_)",
1254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	(@x[$b1],@x[$b1],'ror#25')"		);
1264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin							push @ret,(
1274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&str	($xd_,'[sp,#4*(16+$d1)]')",
1284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&ldr	($xd_,'[sp,#4*(16+$d3)]')"		) if (!$odd);
1294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin							push @ret,(
1304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	(@x[$b0],@x[$b0],$xc,'ror#25')",
1314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	(@x[$b1],@x[$b1],$xc_,'ror#25')"	);
1324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	$xd=@x[$d2]					if (!$odd);
1344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	$xd_=@x[$d3]					if ($odd);
1354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin							push @ret,(
1364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&str	($xc,'[sp,#4*(16+$c0)]')",
1374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&ldr	($xc,'[sp,#4*(16+$c2)]')",
1384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	(@x[$a2],@x[$a2],@x[$b2])",
1394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	($xd,$xd,'ror#16')",
1404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&str	($xc_,'[sp,#4*(16+$c1)]')",
1414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&ldr	($xc_,'[sp,#4*(16+$c3)]')",
1424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	(@x[$a3],@x[$a3],@x[$b3])",
1434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	($xd_,$xd_,'ror#16')",
1444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	($xd,$xd,@x[$a2],'ror#16')",
1454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	($xd_,$xd_,@x[$a3],'ror#16')",
1464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	($xc,$xc,$xd)",
1484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	(@x[$b2],@x[$b2],'ror#20')",
1494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	($xc_,$xc_,$xd_)",
1504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	(@x[$b3],@x[$b3],'ror#20')",
1514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	(@x[$b2],@x[$b2],$xc,'ror#20')",
1524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	(@x[$b3],@x[$b3],$xc_,'ror#20')",
1534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	(@x[$a2],@x[$a2],@x[$b2])",
1554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	($xd,$xd,'ror#24')",
1564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	(@x[$a3],@x[$a3],@x[$b3])",
1574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	($xd_,$xd_,'ror#24')",
1584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	($xd,$xd,@x[$a2],'ror#24')",
1594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	($xd_,$xd_,@x[$a3],'ror#24')",
1604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&add	($xc,$xc,$xd)",
1624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&mov	(@x[$b2],@x[$b2],'ror#25')",
1634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&add	($xc_,$xc_,$xd_)",
1644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&mov	(@x[$b3],@x[$b3],'ror#25')",
1654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&eor	(@x[$b2],@x[$b2],$xc,'ror#25')",
1664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 "&eor	(@x[$b3],@x[$b3],$xc_,'ror#25')"	);
1674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ret;
1694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}
1704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___;
1724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#include <openssl/arm_arch.h>
1734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
174558181089d69085101510906bd46e51ade9e20e9Robert Sloan@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
175558181089d69085101510906bd46e51ade9e20e9Robert Sloan@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
176558181089d69085101510906bd46e51ade9e20e9Robert Sloan.arch  armv7-a
177558181089d69085101510906bd46e51ade9e20e9Robert Sloan
1784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.text
179927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#if defined(__thumb2__) || defined(__clang__)
1804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.syntax	unified
181927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#endif
182927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#if defined(__thumb2__)
1834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.thumb
1844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#else
1854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.code	32
1864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
1874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if defined(__thumb2__) || defined(__clang__)
1894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#define ldrhsb	ldrbhs
1904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
1914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
1924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	5
1934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lsigma:
1944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	@ endian-neutral
1954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lone:
1964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.long	1,0,0,0
1974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_MAX_ARCH__>=7
1984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.LOPENSSL_armcap:
1994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.word   OPENSSL_armcap_P-.LChaCha20_ctr32
2004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#else
2014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.word	-1
2024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
2034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.globl	ChaCha20_ctr32
2054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.type	ChaCha20_ctr32,%function
2064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	5
2074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid BenjaminChaCha20_ctr32:
2084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.LChaCha20_ctr32:
2094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	r12,[sp,#0]		@ pull pointer to counter and nonce
2104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmdb	sp!,{r0-r2,r4-r11,lr}
2114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_ARCH__<7 && !defined(__thumb2__)
2124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	sub	r14,pc,#16		@ ChaCha20_ctr32
2134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#else
2144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	adr	r14,.LChaCha20_ctr32
2154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
2164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp	r2,#0			@ len==0?
2174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#ifdef	__thumb2__
2184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	eq
2194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
2204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	addeq	sp,sp,#4*3
2214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	beq	.Lno_data
2224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_MAX_ARCH__>=7
2234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp	r2,#192			@ test len
2244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bls	.Lshort
2254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	r4,[r14,#-32]
2264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	r4,[r14,r4]
2274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__APPLE__
2284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	r4,[r4]
2294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
2304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	tst	r4,#ARMV7_NEON
2314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bne	.LChaCha20_neon
2324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lshort:
2334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
2344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	r12,{r4-r7}		@ load counter and nonce
2354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	sub	sp,sp,#4*(16)		@ off-load area
2364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	sub	r14,r14,#64		@ .Lsigma
2374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmdb	sp!,{r4-r7}		@ copy counter and nonce
2384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	r3,{r4-r11}		@ load key
2394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	r14,{r0-r3}		@ load sigma
2404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmdb	sp!,{r4-r11}		@ copy key
2414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmdb	sp!,{r0-r3}		@ copy sigma
2424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	r10,[sp,#4*(16+10)]	@ off-load "@x[10]"
2434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	r11,[sp,#4*(16+11)]	@ off-load "@x[11]"
2444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b	.Loop_outer_enter
2454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
2474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_outer:
2484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	sp,{r0-r9}		@ load key material
2494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@t[3],[sp,#4*(32+2)]	@ save len
2504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	r12,  [sp,#4*(32+1)]	@ save inp
2514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	r14,  [sp,#4*(32+0)]	@ save out
2524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_outer_enter:
2534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[3], [sp,#4*(15)]
2544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@x[12],[sp,#4*(12)]	@ modulo-scheduled load
2554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[2], [sp,#4*(13)]
2564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@x[14],[sp,#4*(14)]
2574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@t[3], [sp,#4*(16+15)]
2584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	mov	@t[3],#10
2594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b	.Loop
2604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
2624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop:
2634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	subs	@t[3],@t[3],#1
2644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
2654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	foreach (&ROUND(0, 4, 8,12)) { eval; }
2664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	foreach (&ROUND(0, 5,10,15)) { eval; }
2674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___;
2684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bne	.Loop
2694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[3],[sp,#4*(32+2)]	@ load len
2714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@t[0], [sp,#4*(16+8)]	@ modulo-scheduled store
2734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@t[1], [sp,#4*(16+9)]
2744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[12],[sp,#4*(16+12)]
2754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@t[2], [sp,#4*(16+13)]
2764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[14],[sp,#4*(16+14)]
2774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ at this point we have first half of 512-bit result in
2794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ @x[0-7] and second half at sp+4*(16+8)
2804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp	@t[3],#64		@ done yet?
2824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#ifdef	__thumb2__
2834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itete	lo
2844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
2854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	addlo	r12,sp,#4*(0)		@ shortcut or ...
2864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	r12,[sp,#4*(32+1)]	@ ... load inp
2874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	addlo	r14,sp,#4*(0)		@ shortcut or ...
2884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	r14,[sp,#4*(32+0)]	@ ... load out
2894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[0],[sp,#4*(0)]	@ load key material
2914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[1],[sp,#4*(1)]
2924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
2934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_ARCH__>=6 || !defined(__ARMEB__)
2944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__<7
2954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	orr	@t[2],r12,r14
2964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	tst	@t[2],#3		@ are input and output aligned?
2974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[2],[sp,#4*(2)]
2984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bne	.Lunaligned
2994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp	@t[3],#64		@ restore flags
3004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# else
3014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[2],[sp,#4*(2)]
3024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[3],[sp,#4*(3)]
3044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
3054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[0],@x[0],@t[0]	@ accumulate key material
3064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[1],@x[1],@t[1]
3074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[0],[r12],#16		@ load input
3114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[1],[r12,#-12]
3124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
3134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[2],@x[2],@t[2]
3144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[3],@x[3],@t[3]
3154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[2],[r12,#-8]
3194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[3],[r12,#-4]
3204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__)
3214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[0],@x[0]
3224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[1],@x[1]
3234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[2],@x[2]
3244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[3],@x[3]
3254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[0],@x[0],@t[0]	@ xor with input
3304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[1],@x[1],@t[1]
3314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add	@t[0],sp,#4*(4)
3324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[0],[r14],#16		@ store output
3334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[2],@x[2],@t[2]
3374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[3],@x[3],@t[3]
3384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia	@t[0],{@t[0]-@t[3]}	@ load key material
3394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[1],[r14,#-12]
3404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[2],[r14,#-8]
3414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[3],[r14,#-4]
3424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
3434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[4],@x[4],@t[0]	@ accumulate key material
3444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[5],@x[5],@t[1]
3454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[0],[r12],#16		@ load input
3494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[1],[r12,#-12]
3504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[6],@x[6],@t[2]
3514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[7],@x[7],@t[3]
3524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[2],[r12,#-8]
3564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[3],[r12,#-4]
3574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__)
3584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[4],@x[4]
3594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[5],@x[5]
3604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[6],@x[6]
3614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[7],@x[7]
3624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[4],@x[4],@t[0]
3674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[5],@x[5],@t[1]
3684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add	@t[0],sp,#4*(8)
3694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[4],[r14],#16		@ store output
3704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[6],@x[6],@t[2]
3744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[7],@x[7],@t[3]
3754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[5],[r14,#-12]
3764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia	@t[0],{@t[0]-@t[3]}	@ load key material
3774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[6],[r14,#-8]
3784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add	@x[0],sp,#4*(16+8)
3794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[7],[r14,#-4]
3804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
3814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	@x[0],{@x[0]-@x[7]}	@ load second half
3824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
3834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[0],@x[0],@t[0]	@ accumulate key material
3844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[1],@x[1],@t[1]
3854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[0],[r12],#16		@ load input
3894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[1],[r12,#-12]
3904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hi
3924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
3934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strhi	@t[2],[sp,#4*(16+10)]	@ copy "@x[10]" while at it
3944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strhi	@t[3],[sp,#4*(16+11)]	@ copy "@x[11]" while at it
3954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[2],@x[2],@t[2]
3964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[3],@x[3],@t[3]
3974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
3984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
3994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[2],[r12,#-8]
4014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[3],[r12,#-4]
4024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__)
4034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[0],@x[0]
4044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[1],@x[1]
4054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[2],@x[2]
4064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[3],@x[3]
4074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
4104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[0],@x[0],@t[0]
4124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[1],@x[1],@t[1]
4134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add	@t[0],sp,#4*(12)
4144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[0],[r14],#16		@ store output
4154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
4174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[2],@x[2],@t[2]
4194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[3],@x[3],@t[3]
4204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[1],[r14,#-12]
4214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia	@t[0],{@t[0]-@t[3]}	@ load key material
4224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[2],[r14,#-8]
4234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[3],[r14,#-4]
4244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
4254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[4],@x[4],@t[0]	@ accumulate key material
4264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[5],@x[5],@t[1]
4274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hi
4294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 addhi	@t[0],@t[0],#1		@ next counter value
4314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strhi	@t[0],[sp,#4*(12)]	@ save next counter value
4324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
4344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[0],[r12],#16		@ load input
4364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[1],[r12,#-12]
4374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[6],@x[6],@t[2]
4384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[7],@x[7],@t[3]
4394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
4414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[2],[r12,#-8]
4434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhs	@t[3],[r12,#-4]
4444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__)
4454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[4],@x[4]
4464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[5],@x[5]
4474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[6],@x[6]
4484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev	@x[7],@x[7]
4494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
4524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[4],@x[4],@t[0]
4544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[5],@x[5],@t[1]
4554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 it	ne
4574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldrne	@t[0],[sp,#4*(32+2)]	@ re-load len
4594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
4614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[6],@x[6],@t[2]
4634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorhs	@x[7],@x[7],@t[3]
4644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[4],[r14],#16		@ store output
4654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[5],[r14,#-12]
4664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	it	hs
4684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 subhs	@t[3],@t[0],#64		@ len-=64
4704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[6],[r14,#-8]
4714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str	@x[7],[r14,#-4]
4724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bhi	.Loop_outer
4734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
4744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	beq	.Ldone
4754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__<7
4764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b	.Ltail
4774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
4784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
4794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lunaligned:				@ unaligned endian-neutral path
4804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp	@t[3],#64		@ restore flags
4814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
4834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_ARCH__<7
4844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	@t[3],[sp,#4*(3)]
4854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
4864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminfor ($i=0;$i<16;$i+=4) {
4874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy $j=$i&0x7;
4884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
4894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___	if ($i==4);
4904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[0],sp,#4*(16+8)
4914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
4924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___	if ($i==8);
4934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	@x[0],{@x[0]-@x[7]}		@ load second half
4944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
4954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hi
4964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
4974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	strhi	@t[2],[sp,#4*(16+10)]		@ copy "@x[10]"
4984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	strhi	@t[3],[sp,#4*(16+11)]		@ copy "@x[11]"
4994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
5004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___;
5014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[$j+0],@x[$j+0],@t[0]		@ accumulate key material
5024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
5034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___	if ($i==12);
5044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hi
5064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	addhi	@t[0],@t[0],#1			@ next counter value
5084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	strhi	@t[0],[sp,#4*(12)]		@ save next counter value
5094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
5104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___;
5114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[$j+1],@x[$j+1],@t[1]
5124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[$j+2],@x[$j+2],@t[2]
5134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itete	lo
5154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorlo	@t[0],@t[0],@t[0]		@ zero or ...
5174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[0],[r12],#16			@ ... load input
5184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorlo	@t[1],@t[1],@t[1]
5194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[1],[r12,#-12]
5204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
5214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@x[$j+3],@x[$j+3],@t[3]
5224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itete	lo
5244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorlo	@t[2],@t[2],@t[2]
5264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[2],[r12,#-8]
5274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eorlo	@t[3],@t[3],@t[3]
5284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[3],[r12,#-4]
5294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
5304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+0],@t[0],@x[$j+0]		@ xor with input (or zero)
5314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+1],@t[1],@x[$j+1]
5324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
5344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[0],[r12,#-15]		@ load more input
5364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[1],[r12,#-11]
5374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+2],@t[2],@x[$j+2]
5384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+0],[r14],#16		@ store output
5394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+3],@t[3],@x[$j+3]
5404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
5424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[2],[r12,#-7]
5444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[3],[r12,#-3]
5454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+1],[r14,#-12]
5464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+0],@t[0],@x[$j+0],lsr#8
5474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+2],[r14,#-8]
5484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+1],@t[1],@x[$j+1],lsr#8
5494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
5514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[0],[r12,#-14]		@ load more input
5534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[1],[r12,#-10]
5544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+3],[r14,#-4]
5554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+2],@t[2],@x[$j+2],lsr#8
5564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+0],[r14,#-15]
5574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+3],@t[3],@x[$j+3],lsr#8
5584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
5604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[2],[r12,#-6]
5624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[3],[r12,#-2]
5634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+1],[r14,#-11]
5644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+0],@t[0],@x[$j+0],lsr#8
5654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+2],[r14,#-7]
5664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+1],@t[1],@x[$j+1],lsr#8
5674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
5694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[0],[r12,#-13]		@ load more input
5714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[1],[r12,#-9]
5724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+3],[r14,#-3]
5734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+2],@t[2],@x[$j+2],lsr#8
5744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+0],[r14,#-14]
5754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+3],@t[3],@x[$j+3],lsr#8
5764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
5774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	itt	hs
5784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
5794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[2],[r12,#-5]
5804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrhsb	@t[3],[r12,#-1]
5814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+1],[r14,#-10]
5824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+2],[r14,#-6]
5834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+0],@t[0],@x[$j+0],lsr#8
5844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+3],[r14,#-2]
5854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+1],@t[1],@x[$j+1],lsr#8
5864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+0],[r14,#-13]
5874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+2],@t[2],@x[$j+2],lsr#8
5884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+1],[r14,#-9]
5894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@x[$j+3],@t[3],@x[$j+3],lsr#8
5904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+2],[r14,#-5]
5914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strb	@x[$j+3],[r14,#-1]
5924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
5934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___	if ($i<12);
5944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@t[0],sp,#4*(4+$i)
5954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	@t[0],{@t[0]-@t[3]}		@ load key material
5964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
5974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}
5984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___;
5994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
6004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	it	ne
6014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
6024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrne	@t[0],[sp,#4*(32+2)]		@ re-load len
6034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
6044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	it	hs
6054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
6064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	subhs	@t[3],@t[0],#64			@ len-=64
6074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bhi	.Loop_outer
6084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	beq	.Ldone
6104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
6114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ltail:
6134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	r12,[sp,#4*(32+1)]	@ load inp
6144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	@t[1],sp,#4*(0)
6154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr	r14,[sp,#4*(32+0)]	@ load out
6164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_tail:
6184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrb	@t[2],[@t[1]],#1	@ read buffer on stack
6194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrb	@t[3],[r12],#1		@ read input
6204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	subs	@t[0],@t[0],#1
6214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor	@t[3],@t[3],@t[2]
6224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	strb	@t[3],[r14],#1		@ store output
6234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bne	.Loop_tail
6244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ldone:
6264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add	sp,sp,#4*(32+3)
6274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lno_data:
6284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia	sp!,{r4-r11,pc}
6294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.size	ChaCha20_ctr32,.-ChaCha20_ctr32
6304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
6314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin{{{
6334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$t0,$t1,$t2,$t3) =
6344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin    map("q$_",(0..15));
6354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminsub NEONROUND {
6374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy $odd = pop;
6384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a,$b,$c,$d,$t)=@_;
6394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	(
6414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vadd_i32	($a,$a,$b)",
6424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&veor		($d,$d,$a)",
6434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vrev32_16	($d,$d)",	# vrot ($d,16)
6444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vadd_i32	($c,$c,$d)",
6464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&veor		($t,$b,$c)",
6474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vshr_u32	($b,$t,20)",
6484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vsli_32	($b,$t,12)",
6494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vadd_i32	($a,$a,$b)",
6514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&veor		($t,$d,$a)",
6524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vshr_u32	($d,$t,24)",
6534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vsli_32	($d,$t,8)",
6544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vadd_i32	($c,$c,$d)",
6564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&veor		($t,$b,$c)",
6574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vshr_u32	($b,$t,25)",
6584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vsli_32	($b,$t,7)",
6594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vext_8	($c,$c,$c,8)",
6614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vext_8	($b,$b,$b,$odd?12:4)",
6624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	"&vext_8	($d,$d,$d,$odd?4:12)"
6634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	);
6644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}
6654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___;
6674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_MAX_ARCH__>=7
6684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.arch	armv7-a
6694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.fpu	neon
6704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.type	ChaCha20_neon,%function
6724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	5
6734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid BenjaminChaCha20_neon:
6744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		r12,[sp,#0]		@ pull pointer to counter and nonce
6754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmdb		sp!,{r0-r2,r4-r11,lr}
6764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.LChaCha20_neon:
6774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	adr		r14,.Lsigma
6784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vstmdb		sp!,{d8-d15}		@ ABI spec says so
6794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmdb		sp!,{r0-r3}
6804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.32		{$b0-$c0},[r3]		@ load key
6824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		r3,{r4-r11}		@ load key
6834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	sub		sp,sp,#4*(16+16)
6854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.32		{$d0},[r12]		@ load counter and nonce
6864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		r12,sp,#4*8
6874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		r14,{r0-r3}		@ load sigma
6884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.32		{$a0},[r14]!		@ load sigma
6894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.32		{$t0},[r14]		@ one
6904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.32		{$c0-$d0},[r12]		@ copy 1/2key|counter|nonce
6914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.32		{$a0-$b0},[sp]		@ copy sigma|1/2key
6924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
6934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		r10,[sp,#4*(16+10)]	@ off-load "@x[10]"
6944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		r11,[sp,#4*(16+11)]	@ off-load "@x[11]"
6954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vshl.i32	$t1#lo,$t0#lo,#1	@ two
6964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vstr		$t0#lo,[sp,#4*(16+0)]
6974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vshl.i32	$t2#lo,$t0#lo,#2	@ four
6984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vstr		$t1#lo,[sp,#4*(16+2)]
6994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$a1,$a0
7004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vstr		$t2#lo,[sp,#4*(16+4)]
7014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$a2,$a0
7024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$b1,$b0
7034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$b2,$b0
7044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b		.Loop_neon_enter
7054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
7074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_neon_outer:
7084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		sp,{r0-r9}		@ load key material
7094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp		@t[3],#64*2		@ if len<=64*2
7104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bls		.Lbreak_neon		@ switch to integer-only
7114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$a1,$a0
7124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@t[3],[sp,#4*(32+2)]	@ save len
7134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$a2,$a0
7144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		r12,  [sp,#4*(32+1)]	@ save inp
7154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$b1,$b0
7164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		r14,  [sp,#4*(32+0)]	@ save out
7174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$b2,$b0
7184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_neon_enter:
7194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[3], [sp,#4*(15)]
7204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d1,$d0,$t0		@ counter+1
7214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@x[12],[sp,#4*(12)]	@ modulo-scheduled load
7224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$c1,$c0
7234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[2], [sp,#4*(13)]
7244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vmov		$c2,$c0
7254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@x[14],[sp,#4*(14)]
7264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d2,$d1,$t0		@ counter+2
7274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@t[3], [sp,#4*(16+15)]
7284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	mov		@t[3],#10
729a94fe0531b3c196ad078174259af2201b2e3a246Robert Sloan	add		@x[12],@x[12],#3	@ counter+3
7304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b		.Loop_neon
7314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
7334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_neon:
7344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	subs		@t[3],@t[3],#1
7354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
7364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	my @thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,0);
7374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	my @thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,0);
7384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	my @thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,0);
7394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	my @thread3=&ROUND(0,4,8,12);
7404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	foreach (@thread0) {
7424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin		eval;			eval(shift(@thread3));
7434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin		eval(shift(@thread1));	eval(shift(@thread3));
7444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin		eval(shift(@thread2));	eval(shift(@thread3));
7454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	}
7464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,1);
7484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,1);
7494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,1);
7504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@thread3=&ROUND(0,5,10,15);
7514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	foreach (@thread0) {
7534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin		eval;			eval(shift(@thread3));
7544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin		eval(shift(@thread1));	eval(shift(@thread3));
7554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin		eval(shift(@thread2));	eval(shift(@thread3));
7564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	}
7574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___;
7584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bne		.Loop_neon
7594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[3],sp,#32
7614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.32		{$t0-$t1},[sp]		@ load key material
7624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.32		{$t2-$t3},[@t[3]]
7634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[3],[sp,#4*(32+2)]	@ load len
7654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@t[0], [sp,#4*(16+8)]	@ modulo-scheduled store
7674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@t[1], [sp,#4*(16+9)]
7684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[12],[sp,#4*(16+12)]
7694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@t[2], [sp,#4*(16+13)]
7704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[14],[sp,#4*(16+14)]
7714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ at this point we have first half of 512-bit result in
7734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ @x[0-7] and second half at sp+4*(16+8)
7744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		r12,[sp,#4*(32+1)]	@ load inp
7764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		r14,[sp,#4*(32+0)]	@ load out
7774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$a0,$a0,$t0		@ accumulate key material
7794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$a1,$a1,$t0
7804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$a2,$a2,$t0
7814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vldr		$t0#lo,[sp,#4*(16+0)]	@ one
7824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$b0,$b0,$t1
7844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$b1,$b1,$t1
7854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$b2,$b2,$t1
7864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vldr		$t1#lo,[sp,#4*(16+2)]	@ two
7874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$c0,$c0,$t2
7894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$c1,$c1,$t2
7904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$c2,$c2,$t2
7914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d1#lo,$d1#lo,$t0#lo	@ counter+1
7924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d2#lo,$d2#lo,$t1#lo	@ counter+2
7934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d0,$d0,$t3
7954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d1,$d1,$t3
7964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d2,$d2,$t3
7974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
7984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp		@t[3],#64*4
7994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	blo		.Ltail_neon
8004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!	@ load input
8024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 mov		@t[3],sp
8034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
8044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a0,$a0,$t0		@ xor with input
8054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b0,$b0,$t1
8064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
8074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c0,$c0,$t2
8084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d0,$d0,$t3
8094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
8104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a1,$a1,$t0
8124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$a0-$b0},[r14]!	@ store output
8134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b1,$b1,$t1
8144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
8154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c1,$c1,$t2
8164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$c0-$d0},[r14]!
8174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d1,$d1,$t3
8184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
8194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a2,$a2,$t0
8214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vld1.32	{$a0-$b0},[@t[3]]!	@ load for next iteration
8224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 veor		$t0#hi,$t0#hi,$t0#hi
8234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vldr		$t0#lo,[sp,#4*(16+4)]	@ four
8244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b2,$b2,$t1
8254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vld1.32	{$c0-$d0},[@t[3]]
8264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c2,$c2,$t2
8274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$a1-$b1},[r14]!
8284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d2,$d2,$t3
8294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$c1-$d1},[r14]!
8304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vadd.i32	$d0#lo,$d0#lo,$t0#lo	@ next counter value
8324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vldr		$t0#lo,[sp,#4*(16+0)]	@ one
8334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		sp,{@t[0]-@t[3]}	@ load key material
8354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[0],@x[0],@t[0]	@ accumulate key material
8364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[0],[r12],#16		@ load input
8374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$a2-$b2},[r14]!
8384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[1],@x[1],@t[1]
8394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[1],[r12,#-12]
8404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$c2-$d2},[r14]!
8414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[2],@x[2],@t[2]
8424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[2],[r12,#-8]
8434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[3],@x[3],@t[3]
8444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[3],[r12,#-4]
8454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__ARMEB__
8464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[0],@x[0]
8474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[1],@x[1]
8484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[2],@x[2]
8494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[3],@x[3]
8504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
8514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[0],@x[0],@t[0]	@ xor with input
8524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],sp,#4*(4)
8534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[1],@x[1],@t[1]
8544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[0],[r14],#16		@ store output
8554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[2],@x[2],@t[2]
8564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[1],[r14,#-12]
8574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[3],@x[3],@t[3]
8584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
8594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[2],[r14,#-8]
8604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[3],[r14,#-4]
8614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[4],@x[4],@t[0]	@ accumulate key material
8634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[0],[r12],#16		@ load input
8644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[5],@x[5],@t[1]
8654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[1],[r12,#-12]
8664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[6],@x[6],@t[2]
8674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[2],[r12,#-8]
8684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[7],@x[7],@t[3]
8694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[3],[r12,#-4]
8704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__ARMEB__
8714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[4],@x[4]
8724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[5],@x[5]
8734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[6],@x[6]
8744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[7],@x[7]
8754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
8764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[4],@x[4],@t[0]
8774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],sp,#4*(8)
8784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[5],@x[5],@t[1]
8794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[4],[r14],#16		@ store output
8804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[6],@x[6],@t[2]
8814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[5],[r14,#-12]
8824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[7],@x[7],@t[3]
8834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
8844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[6],[r14,#-8]
8854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@x[0],sp,#4*(16+8)
8864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[7],[r14,#-4]
8874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		@x[0],{@x[0]-@x[7]}	@ load second half
8894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
8904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[0],@x[0],@t[0]	@ accumulate key material
8914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[0],[r12],#16		@ load input
8924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[1],@x[1],@t[1]
8934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[1],[r12,#-12]
8944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
8954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	it	hi
8964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
8974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strhi		@t[2],[sp,#4*(16+10)]	@ copy "@x[10]" while at it
8984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[2],@x[2],@t[2]
8994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[2],[r12,#-8]
9004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
9014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	it	hi
9024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
9034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 strhi		@t[3],[sp,#4*(16+11)]	@ copy "@x[11]" while at it
9044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[3],@x[3],@t[3]
9054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[3],[r12,#-4]
9064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__ARMEB__
9074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[0],@x[0]
9084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[1],@x[1]
9094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[2],@x[2]
9104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[3],@x[3]
9114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
9124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[0],@x[0],@t[0]
9134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],sp,#4*(12)
9144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[1],@x[1],@t[1]
9154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[0],[r14],#16		@ store output
9164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[2],@x[2],@t[2]
9174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[1],[r14,#-12]
9184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[3],@x[3],@t[3]
9194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
9204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[2],[r14,#-8]
9214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[3],[r14,#-4]
9224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[4],@x[4],@t[0]	@ accumulate key material
9244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],@t[0],#4		@ next counter value
9254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[5],@x[5],@t[1]
9264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 str		@t[0],[sp,#4*(12)]	@ save next counter value
9274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[0],[r12],#16		@ load input
9284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[6],@x[6],@t[2]
9294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@x[4],@x[4],#3		@ counter+3
9304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[1],[r12,#-12]
9314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[7],@x[7],@t[3]
9324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[2],[r12,#-8]
9334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[3],[r12,#-4]
9344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__ARMEB__
9354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[4],@x[4]
9364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[5],@x[5]
9374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[6],@x[6]
9384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[7],@x[7]
9394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
9404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[4],@x[4],@t[0]
9414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__thumb2__
9424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	it	hi
9434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
9444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldrhi		@t[0],[sp,#4*(32+2)]	@ re-load len
9454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[5],@x[5],@t[1]
9464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[6],@x[6],@t[2]
9474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[4],[r14],#16		@ store output
9484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@x[7],@x[7],@t[3]
9494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[5],[r14,#-12]
9504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 sub		@t[3],@t[0],#64*4	@ len-=64*4
9514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[6],[r14,#-8]
9524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[7],[r14,#-4]
9534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bhi		.Loop_neon_outer
9544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b		.Ldone_neon
9564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
9584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lbreak_neon:
9594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ harmonize NEON and integer-only stack frames: load data
9604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ from NEON frame, but save to integer-only one; distance
9614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	@ between the two is 4*(32+4+16-32)=4*(20).
9624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@t[3], [sp,#4*(20+32+2)]	@ save len
9644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[3],sp,#4*(32+4)
9654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		r12,   [sp,#4*(20+32+1)]	@ save inp
9664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		r14,   [sp,#4*(20+32+0)]	@ save out
9674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@x[12],[sp,#4*(16+10)]
9694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@x[14],[sp,#4*(16+11)]
9704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vldmia		@t[3],{d8-d15}			@ fulfill ABI requirement
9714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[12],[sp,#4*(20+16+10)]	@ copy "@x[10]"
9724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@x[14],[sp,#4*(20+16+11)]	@ copy "@x[11]"
9734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[3], [sp,#4*(15)]
9754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@x[12],[sp,#4*(12)]		@ modulo-scheduled load
9764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@t[2], [sp,#4*(13)]
9774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldr		@x[14],[sp,#4*(14)]
9784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	str		@t[3], [sp,#4*(20+16+15)]
9794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[3],sp,#4*(20)
9804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.32		{$a0-$b0},[@t[3]]!		@ copy key
9814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		sp,sp,#4*(20)			@ switch frame
9824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.32		{$c0-$d0},[@t[3]]
9834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	mov		@t[3],#10
9844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b		.Loop				@ go integer-only
9854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
9874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ltail_neon:
9884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp		@t[3],#64*3
9894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bhs		.L192_or_more_neon
9904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp		@t[3],#64*2
9914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bhs		.L128_or_more_neon
9924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	cmp		@t[3],#64*1
9934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bhs		.L64_or_more_neon
9944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
9954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[0],sp,#4*(8)
9964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$a0-$b0},[sp]
9974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[2],sp,#4*(0)
9984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$c0-$d0},[@t[0]]
9994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b		.Loop_tail_neon
10004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
10024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.L64_or_more_neon:
10034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
10044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
10054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a0,$a0,$t0
10064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b0,$b0,$t1
10074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c0,$c0,$t2
10084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d0,$d0,$t3
10094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$a0-$b0},[r14]!
10104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$c0-$d0},[r14]!
10114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	beq		.Ldone_neon
10134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[0],sp,#4*(8)
10154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$a1-$b1},[sp]
10164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[2],sp,#4*(0)
10174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$c1-$d1},[@t[0]]
10184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	sub		@t[3],@t[3],#64*1	@ len-=64*1
10194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b		.Loop_tail_neon
10204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
10224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.L128_or_more_neon:
10234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
10244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
10254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a0,$a0,$t0
10264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b0,$b0,$t1
10274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
10284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c0,$c0,$t2
10294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d0,$d0,$t3
10304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
10314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a1,$a1,$t0
10334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b1,$b1,$t1
10344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$a0-$b0},[r14]!
10354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c1,$c1,$t2
10364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$c0-$d0},[r14]!
10374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d1,$d1,$t3
10384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$a1-$b1},[r14]!
10394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$c1-$d1},[r14]!
10404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	beq		.Ldone_neon
10424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[0],sp,#4*(8)
10444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$a2-$b2},[sp]
10454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@t[2],sp,#4*(0)
10464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$c2-$d2},[@t[0]]
10474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	sub		@t[3],@t[3],#64*2	@ len-=64*2
10484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	b		.Loop_tail_neon
10494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align	4
10514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.L192_or_more_neon:
10524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
10534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
10544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a0,$a0,$t0
10554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b0,$b0,$t1
10564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
10574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c0,$c0,$t2
10584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d0,$d0,$t3
10594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
10604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a1,$a1,$t0
10624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b1,$b1,$t1
10634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t0-$t1},[r12]!
10644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c1,$c1,$t2
10654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$a0-$b0},[r14]!
10664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d1,$d1,$t3
10674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vld1.8		{$t2-$t3},[r12]!
10684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$a2,$a2,$t0
10704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$c0-$d0},[r14]!
10714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$b2,$b2,$t1
10724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$a1-$b1},[r14]!
10734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$c2,$c2,$t2
10744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 vst1.8		{$c1-$d1},[r14]!
10754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	veor		$d2,$d2,$t3
10764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$a2-$b2},[r14]!
10774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vst1.8		{$c2-$d2},[r14]!
10784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	beq		.Ldone_neon
10804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		sp,{@t[0]-@t[3]}	@ load key material
10824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[0],@x[0],@t[0]	@ accumulate key material
10834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],sp,#4*(4)
10844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[1],@x[1],@t[1]
10854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[2],@x[2],@t[2]
10864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[3],@x[3],@t[3]
10874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
10884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
10894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[4],@x[4],@t[0]	@ accumulate key material
10904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],sp,#4*(8)
10914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[5],@x[5],@t[1]
10924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[6],@x[6],@t[2]
10934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[7],@x[7],@t[3]
10944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
10954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__ARMEB__
10964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[0],@x[0]
10974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[1],@x[1]
10984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[2],@x[2]
10994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[3],@x[3]
11004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[4],@x[4]
11014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[5],@x[5]
11024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[6],@x[6]
11034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[7],@x[7]
11044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
11054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmia		sp,{@x[0]-@x[7]}
11064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@x[0],sp,#4*(16+8)
11074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		@x[0],{@x[0]-@x[7]}	@ load second half
11094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[0],@x[0],@t[0]	@ accumulate key material
11114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],sp,#4*(12)
11124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[1],@x[1],@t[1]
11134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[2],@x[2],@t[2]
11144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[3],@x[3],@t[3]
11154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldmia		@t[0],{@t[0]-@t[3]}	@ load key material
11164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[4],@x[4],@t[0]	@ accumulate key material
11184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[0],sp,#4*(8)
11194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[5],@x[5],@t[1]
11204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@x[4],@x[4],#3		@ counter+3
11214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[6],@x[6],@t[2]
11224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		@x[7],@x[7],@t[3]
11234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 ldr		@t[3],[sp,#4*(32+2)]	@ re-load len
11244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef	__ARMEB__
11254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[0],@x[0]
11264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[1],@x[1]
11274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[2],@x[2]
11284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[3],@x[3]
11294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[4],@x[4]
11304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[5],@x[5]
11314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[6],@x[6]
11324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	rev		@x[7],@x[7]
11334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif
11344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	stmia		@t[0],{@x[0]-@x[7]}
11354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 add		@t[2],sp,#4*(0)
11364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	 sub		@t[3],@t[3],#64*3	@ len-=64*3
11374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_tail_neon:
11394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrb		@t[0],[@t[2]],#1	@ read buffer on stack
11404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldrb		@t[1],[r12],#1		@ read input
11414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	subs		@t[3],@t[3],#1
11424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	eor		@t[0],@t[0],@t[1]
1143a94fe0531b3c196ad078174259af2201b2e3a246Robert Sloan	strb		@t[0],[r14],#1		@ store output
11444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	bne		.Loop_tail_neon
11454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ldone_neon:
11474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		sp,sp,#4*(32+4)
11484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	vldmia		sp,{d8-d15}
11494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	add		sp,sp,#4*(16+3)
11504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	ldmia		sp!,{r4-r11,pc}
11514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.size	ChaCha20_neon,.-ChaCha20_neon
11524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.comm	OPENSSL_armcap_P,4,4
11534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif
11544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___
11554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}}}
11564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminforeach (split("\n",$code)) {
11584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	s/\`([^\`]*)\`/eval $1/geo;
11594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo;
11614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin
11624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin	print $_,"\n";
11634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}
11644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminclose STDOUT;
1165