18ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#! /usr/bin/env perl 28ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. 38ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# 48ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Licensed under the OpenSSL license (the "License"). You may not use 58ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# this file except in compliance with the License. You can obtain a copy 68ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# in the file LICENSE in the source distribution or at 78ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# https://www.openssl.org/source/license.html 88ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 94969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# 104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ==================================================================== 114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# project. The module is, however, dual licensed under OpenSSL and 134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# CRYPTOGAMS licenses depending on where you obtain it. For further 144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# details see http://www.openssl.org/~appro/cryptogams/. 154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ==================================================================== 164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# 174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# December 2014 18a94fe0531b3c196ad078174259af2201b2e3a246Robert Sloan# 194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ChaCha20 for ARMv4. 204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# 214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Performance in cycles per byte out of large buffer. 224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# 234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# IALU/gcc-4.4 1xNEON 3xNEON+1xIALU 244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# 254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A5 19.3(*)/+95% 21.8 14.1 264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A8 10.5(*)/+160% 13.9 6.35 274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A9 12.9(**)/+110% 14.3 6.50 284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Cortex-A15 11.0/+40% 16.0 5.00 294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# Snapdragon S4 11.5/+125% 13.6 4.90 304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# 314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# (*) most "favourable" result for aligned data on little-endian 324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# processor, result for misaligned data is 10-15% lower; 334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# (**) this result is a trade-off: it can be improved by 20%, 344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# but then Snapdragon S4 and Cortex-A8 results get 354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# 20-25% worse; 364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$flavour = shift; 38c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminif ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } 39c895d6b1c580258e72e1ed3fcc86d38970ded9e1David Benjaminelse { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } 404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminif ($flavour && $flavour ne "void") { 424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin die "can't locate arm-xlate.pl"; 464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin open STDOUT,"| \"$^X\" $xlate $flavour $output"; 484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} else { 494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin open STDOUT,">$output"; 504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} 514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminsub AUTOLOAD() # thunk [simplified] x86-style perlasm 534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; 544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin my $arg = pop; 554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin $arg = "#$arg" if ($arg*1 eq $arg); 564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; 574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} 584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy @x=map("r$_",(0..7,"x","x","x","x",12,"x",14,"x")); 604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy @t=map("r$_",(8..11)); 614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminsub ROUND { 634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a0,$b0,$c0,$d0)=@_; 644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); 654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); 664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); 674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy $odd = $d0&1; 684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($xc,$xc_) = (@t[0..1]); 694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($xd,$xd_) = $odd ? (@t[2],@x[$d1]) : (@x[$d0],@t[2]); 704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy @ret; 714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # Consider order in which variables are addressed by their 734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # index: 744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # a b c d 764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 0 4 8 12 < even round 784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 1 5 9 13 794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 2 6 10 14 804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 3 7 11 15 814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 0 5 10 15 < odd round 824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 1 6 11 12 834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 2 7 8 13 844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 3 4 9 14 854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # 'a', 'b' are permanently allocated in registers, @x[0..7], 874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # while 'c's and pair of 'd's are maintained in memory. If 884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # you observe 'c' column, you'll notice that pair of 'c's is 894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # invariant between rounds. This means that we have to reload 904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # them once per round, in the middle. This is why you'll see 914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # bunch of 'c' stores and loads in the middle, but none in 924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # the beginning or end. If you observe 'd' column, you'll 934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # notice that 15 and 13 are reused in next pair of rounds. 944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # This is why these two are chosen for offloading to memory, 954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin # to make loads count more. 964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin push @ret,( 974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a0],@x[$a0],@x[$b0])", 984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd,$xd,'ror#16')", 994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a1],@x[$a1],@x[$b1])", 1004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd_,$xd_,'ror#16')", 1014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd,$xd,@x[$a0],'ror#16')", 1024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd_,$xd_,@x[$a1],'ror#16')", 1034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc,$xc,$xd)", 1054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b0],@x[$b0],'ror#20')", 1064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc_,$xc_,$xd_)", 1074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b1],@x[$b1],'ror#20')", 1084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b0],@x[$b0],$xc,'ror#20')", 1094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b1],@x[$b1],$xc_,'ror#20')", 1104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a0],@x[$a0],@x[$b0])", 1124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd,$xd,'ror#24')", 1134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a1],@x[$a1],@x[$b1])", 1144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd_,$xd_,'ror#24')", 1154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd,$xd,@x[$a0],'ror#24')", 1164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd_,$xd_,@x[$a1],'ror#24')", 1174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc,$xc,$xd)", 1194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b0],@x[$b0],'ror#25')" ); 1204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin push @ret,( 1214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&str ($xd,'[sp,#4*(16+$d0)]')", 1224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&ldr ($xd,'[sp,#4*(16+$d2)]')" ) if ($odd); 1234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin push @ret,( 1244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc_,$xc_,$xd_)", 1254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b1],@x[$b1],'ror#25')" ); 1264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin push @ret,( 1274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&str ($xd_,'[sp,#4*(16+$d1)]')", 1284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&ldr ($xd_,'[sp,#4*(16+$d3)]')" ) if (!$odd); 1294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin push @ret,( 1304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b0],@x[$b0],$xc,'ror#25')", 1314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b1],@x[$b1],$xc_,'ror#25')" ); 1324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin $xd=@x[$d2] if (!$odd); 1344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin $xd_=@x[$d3] if ($odd); 1354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin push @ret,( 1364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&str ($xc,'[sp,#4*(16+$c0)]')", 1374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&ldr ($xc,'[sp,#4*(16+$c2)]')", 1384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a2],@x[$a2],@x[$b2])", 1394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd,$xd,'ror#16')", 1404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&str ($xc_,'[sp,#4*(16+$c1)]')", 1414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&ldr ($xc_,'[sp,#4*(16+$c3)]')", 1424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a3],@x[$a3],@x[$b3])", 1434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd_,$xd_,'ror#16')", 1444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd,$xd,@x[$a2],'ror#16')", 1454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd_,$xd_,@x[$a3],'ror#16')", 1464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc,$xc,$xd)", 1484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b2],@x[$b2],'ror#20')", 1494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc_,$xc_,$xd_)", 1504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b3],@x[$b3],'ror#20')", 1514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b2],@x[$b2],$xc,'ror#20')", 1524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b3],@x[$b3],$xc_,'ror#20')", 1534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a2],@x[$a2],@x[$b2])", 1554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd,$xd,'ror#24')", 1564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add (@x[$a3],@x[$a3],@x[$b3])", 1574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov ($xd_,$xd_,'ror#24')", 1584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd,$xd,@x[$a2],'ror#24')", 1594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor ($xd_,$xd_,@x[$a3],'ror#24')", 1604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc,$xc,$xd)", 1624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b2],@x[$b2],'ror#25')", 1634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&add ($xc_,$xc_,$xd_)", 1644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&mov (@x[$b3],@x[$b3],'ror#25')", 1654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b2],@x[$b2],$xc,'ror#25')", 1664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&eor (@x[$b3],@x[$b3],$xc_,'ror#25')" ); 1674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ret; 1694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} 1704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___; 1724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#include <openssl/arm_arch.h> 1734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 174558181089d69085101510906bd46e51ade9e20e9Robert Sloan@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both 175558181089d69085101510906bd46e51ade9e20e9Robert Sloan@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. 176558181089d69085101510906bd46e51ade9e20e9Robert Sloan.arch armv7-a 177558181089d69085101510906bd46e51ade9e20e9Robert Sloan 1784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.text 179927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#if defined(__thumb2__) || defined(__clang__) 1804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.syntax unified 181927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#endif 182927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#if defined(__thumb2__) 1834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.thumb 1844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#else 1854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.code 32 1864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 1874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if defined(__thumb2__) || defined(__clang__) 1894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#define ldrhsb ldrbhs 1904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 1914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 1924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 5 1934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lsigma: 1944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral 1954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lone: 1964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.long 1,0,0,0 1974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_MAX_ARCH__>=7 1984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.LOPENSSL_armcap: 1994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.word OPENSSL_armcap_P-.LChaCha20_ctr32 2004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#else 2014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.word -1 2024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 2034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.globl ChaCha20_ctr32 2054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.type ChaCha20_ctr32,%function 2064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 5 2074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid BenjaminChaCha20_ctr32: 2084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.LChaCha20_ctr32: 2094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r12,[sp,#0] @ pull pointer to counter and nonce 2104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmdb sp!,{r0-r2,r4-r11,lr} 2114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_ARCH__<7 && !defined(__thumb2__) 2124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub r14,pc,#16 @ ChaCha20_ctr32 2134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#else 2144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin adr r14,.LChaCha20_ctr32 2154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 2164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp r2,#0 @ len==0? 2174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#ifdef __thumb2__ 2184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt eq 2194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 2204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin addeq sp,sp,#4*3 2214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin beq .Lno_data 2224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_MAX_ARCH__>=7 2234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp r2,#192 @ test len 2244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bls .Lshort 2254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r4,[r14,#-32] 2264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r4,[r14,r4] 2274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __APPLE__ 2284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r4,[r4] 2294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 2304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin tst r4,#ARMV7_NEON 2314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bne .LChaCha20_neon 2324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lshort: 2334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 2344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia r12,{r4-r7} @ load counter and nonce 2354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub sp,sp,#4*(16) @ off-load area 2364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub r14,r14,#64 @ .Lsigma 2374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmdb sp!,{r4-r7} @ copy counter and nonce 2384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia r3,{r4-r11} @ load key 2394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia r14,{r0-r3} @ load sigma 2404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmdb sp!,{r4-r11} @ copy key 2414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmdb sp!,{r0-r3} @ copy sigma 2424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r10,[sp,#4*(16+10)] @ off-load "@x[10]" 2434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r11,[sp,#4*(16+11)] @ off-load "@x[11]" 2444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop_outer_enter 2454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 2474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_outer: 2484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia sp,{r0-r9} @ load key material 2494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[3],[sp,#4*(32+2)] @ save len 2504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r12, [sp,#4*(32+1)] @ save inp 2514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r14, [sp,#4*(32+0)] @ save out 2524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_outer_enter: 2534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3], [sp,#4*(15)] 2544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[12],[sp,#4*(12)] @ modulo-scheduled load 2554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2], [sp,#4*(13)] 2564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[14],[sp,#4*(14)] 2574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[3], [sp,#4*(16+15)] 2584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin mov @t[3],#10 2594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop 2604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 2624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop: 2634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin subs @t[3],@t[3],#1 2644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 2654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin foreach (&ROUND(0, 4, 8,12)) { eval; } 2664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin foreach (&ROUND(0, 5,10,15)) { eval; } 2674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___; 2684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bne .Loop 2694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[sp,#4*(32+2)] @ load len 2714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[0], [sp,#4*(16+8)] @ modulo-scheduled store 2734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[1], [sp,#4*(16+9)] 2744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[12],[sp,#4*(16+12)] 2754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[2], [sp,#4*(16+13)] 2764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[14],[sp,#4*(16+14)] 2774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ at this point we have first half of 512-bit result in 2794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ @x[0-7] and second half at sp+4*(16+8) 2804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64 @ done yet? 2824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#ifdef __thumb2__ 2834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itete lo 2844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 2854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin addlo r12,sp,#4*(0) @ shortcut or ... 2864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs r12,[sp,#4*(32+1)] @ ... load inp 2874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin addlo r14,sp,#4*(0) @ shortcut or ... 2884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs r14,[sp,#4*(32+0)] @ ... load out 2894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[0],[sp,#4*(0)] @ load key material 2914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[1],[sp,#4*(1)] 2924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 2934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_ARCH__>=6 || !defined(__ARMEB__) 2944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__<7 2954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin orr @t[2],r12,r14 2964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin tst @t[2],#3 @ are input and output aligned? 2974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2],[sp,#4*(2)] 2984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bne .Lunaligned 2994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64 @ restore flags 3004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# else 3014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2],[sp,#4*(2)] 3024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[sp,#4*(3)] 3044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 3054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],@x[0],@t[0] @ accumulate key material 3064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[1],@x[1],@t[1] 3074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[0],[r12],#16 @ load input 3114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[1],[r12,#-12] 3124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 3134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[2],@x[2],@t[2] 3144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[3],@x[3],@t[3] 3154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[2],[r12,#-8] 3194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[3],[r12,#-4] 3204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__) 3214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[0],@x[0] 3224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[1],@x[1] 3234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[2],@x[2] 3244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[3],@x[3] 3254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[0],@x[0],@t[0] @ xor with input 3304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[1],@x[1],@t[1] 3314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(4) 3324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[0],[r14],#16 @ store output 3334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[2],@x[2],@t[2] 3374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[3],@x[3],@t[3] 3384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 3394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[1],[r14,#-12] 3404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[2],[r14,#-8] 3414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[3],[r14,#-4] 3424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 3434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],@t[0] @ accumulate key material 3444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[5],@x[5],@t[1] 3454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[0],[r12],#16 @ load input 3494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[1],[r12,#-12] 3504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[6],@x[6],@t[2] 3514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[7],@x[7],@t[3] 3524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[2],[r12,#-8] 3564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[3],[r12,#-4] 3574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__) 3584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[4],@x[4] 3594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[5],@x[5] 3604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[6],@x[6] 3614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[7],@x[7] 3624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[4],@x[4],@t[0] 3674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[5],@x[5],@t[1] 3684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(8) 3694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[4],[r14],#16 @ store output 3704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[6],@x[6],@t[2] 3744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[7],@x[7],@t[3] 3754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[5],[r14,#-12] 3764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 3774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[6],[r14,#-8] 3784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],sp,#4*(16+8) 3794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[7],[r14,#-4] 3804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 3814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @x[0],{@x[0]-@x[7]} @ load second half 3824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 3834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],@x[0],@t[0] @ accumulate key material 3844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[1],@x[1],@t[1] 3854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[0],[r12],#16 @ load input 3894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[1],[r12,#-12] 3904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hi 3924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 3934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[2],[sp,#4*(16+10)] @ copy "@x[10]" while at it 3944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[3],[sp,#4*(16+11)] @ copy "@x[11]" while at it 3954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[2],@x[2],@t[2] 3964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[3],@x[3],@t[3] 3974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 3984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 3994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[2],[r12,#-8] 4014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[3],[r12,#-4] 4024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__) 4034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[0],@x[0] 4044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[1],@x[1] 4054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[2],@x[2] 4064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[3],@x[3] 4074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 4104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[0],@x[0],@t[0] 4124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[1],@x[1],@t[1] 4134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(12) 4144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[0],[r14],#16 @ store output 4154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 4174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[2],@x[2],@t[2] 4194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[3],@x[3],@t[3] 4204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[1],[r14,#-12] 4214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 4224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[2],[r14,#-8] 4234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[3],[r14,#-4] 4244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 4254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],@t[0] @ accumulate key material 4264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[5],@x[5],@t[1] 4274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hi 4294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin addhi @t[0],@t[0],#1 @ next counter value 4314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[0],[sp,#4*(12)] @ save next counter value 4324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 4344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[0],[r12],#16 @ load input 4364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[1],[r12,#-12] 4374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[6],@x[6],@t[2] 4384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[7],@x[7],@t[3] 4394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 4414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[2],[r12,#-8] 4434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhs @t[3],[r12,#-4] 4444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__>=6 && defined(__ARMEB__) 4454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[4],@x[4] 4464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[5],@x[5] 4474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[6],@x[6] 4484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[7],@x[7] 4494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 4524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[4],@x[4],@t[0] 4544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[5],@x[5],@t[1] 4554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin it ne 4574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrne @t[0],[sp,#4*(32+2)] @ re-load len 4594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 4614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[6],@x[6],@t[2] 4634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorhs @x[7],@x[7],@t[3] 4644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[4],[r14],#16 @ store output 4654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[5],[r14,#-12] 4664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin it hs 4684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin subhs @t[3],@t[0],#64 @ len-=64 4704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[6],[r14,#-8] 4714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[7],[r14,#-4] 4724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bhi .Loop_outer 4734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 4744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin beq .Ldone 4754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# if __ARM_ARCH__<7 4764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Ltail 4774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 4784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 4794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lunaligned: @ unaligned endian-neutral path 4804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64 @ restore flags 4814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 4834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_ARCH__<7 4844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[sp,#4*(3)] 4854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 4864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminfor ($i=0;$i<16;$i+=4) { 4874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy $j=$i&0x7; 4884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 4894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___ if ($i==4); 4904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],sp,#4*(16+8) 4914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 4924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___ if ($i==8); 4934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @x[0],{@x[0]-@x[7]} @ load second half 4944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 4954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hi 4964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 4974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[2],[sp,#4*(16+10)] @ copy "@x[10]" 4984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[3],[sp,#4*(16+11)] @ copy "@x[11]" 4994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 5004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___; 5014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[$j+0],@x[$j+0],@t[0] @ accumulate key material 5024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 5034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___ if ($i==12); 5044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hi 5064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin addhi @t[0],@t[0],#1 @ next counter value 5084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[0],[sp,#4*(12)] @ save next counter value 5094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 5104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___; 5114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[$j+1],@x[$j+1],@t[1] 5124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[$j+2],@x[$j+2],@t[2] 5134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itete lo 5154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorlo @t[0],@t[0],@t[0] @ zero or ... 5174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[0],[r12],#16 @ ... load input 5184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorlo @t[1],@t[1],@t[1] 5194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[1],[r12,#-12] 5204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 5214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[$j+3],@x[$j+3],@t[3] 5224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itete lo 5244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorlo @t[2],@t[2],@t[2] 5264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[2],[r12,#-8] 5274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eorlo @t[3],@t[3],@t[3] 5284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[3],[r12,#-4] 5294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 5304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+0],@t[0],@x[$j+0] @ xor with input (or zero) 5314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+1],@t[1],@x[$j+1] 5324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 5344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[0],[r12,#-15] @ load more input 5364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[1],[r12,#-11] 5374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+2],@t[2],@x[$j+2] 5384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+0],[r14],#16 @ store output 5394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+3],@t[3],@x[$j+3] 5404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 5424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[2],[r12,#-7] 5444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[3],[r12,#-3] 5454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+1],[r14,#-12] 5464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+0],@t[0],@x[$j+0],lsr#8 5474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+2],[r14,#-8] 5484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+1],@t[1],@x[$j+1],lsr#8 5494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 5514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[0],[r12,#-14] @ load more input 5534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[1],[r12,#-10] 5544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+3],[r14,#-4] 5554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+2],@t[2],@x[$j+2],lsr#8 5564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+0],[r14,#-15] 5574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+3],@t[3],@x[$j+3],lsr#8 5584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 5604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[2],[r12,#-6] 5624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[3],[r12,#-2] 5634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+1],[r14,#-11] 5644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+0],@t[0],@x[$j+0],lsr#8 5654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+2],[r14,#-7] 5664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+1],@t[1],@x[$j+1],lsr#8 5674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 5694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[0],[r12,#-13] @ load more input 5714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[1],[r12,#-9] 5724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+3],[r14,#-3] 5734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+2],@t[2],@x[$j+2],lsr#8 5744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+0],[r14,#-14] 5754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+3],@t[3],@x[$j+3],lsr#8 5764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 5774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin itt hs 5784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 5794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[2],[r12,#-5] 5804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhsb @t[3],[r12,#-1] 5814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+1],[r14,#-10] 5824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+2],[r14,#-6] 5834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+0],@t[0],@x[$j+0],lsr#8 5844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+3],[r14,#-2] 5854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+1],@t[1],@x[$j+1],lsr#8 5864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+0],[r14,#-13] 5874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+2],@t[2],@x[$j+2],lsr#8 5884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+1],[r14,#-9] 5894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[$j+3],@t[3],@x[$j+3],lsr#8 5904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+2],[r14,#-5] 5914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @x[$j+3],[r14,#-1] 5924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 5934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___ if ($i<12); 5944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(4+$i) 5954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 5964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 5974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} 5984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___; 5994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 6004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin it ne 6014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 6024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrne @t[0],[sp,#4*(32+2)] @ re-load len 6034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 6044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin it hs 6054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 6064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin subhs @t[3],@t[0],#64 @ len-=64 6074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bhi .Loop_outer 6084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin beq .Ldone 6104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 6114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ltail: 6134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r12,[sp,#4*(32+1)] @ load inp 6144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[1],sp,#4*(0) 6154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r14,[sp,#4*(32+0)] @ load out 6164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_tail: 6184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrb @t[2],[@t[1]],#1 @ read buffer on stack 6194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrb @t[3],[r12],#1 @ read input 6204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin subs @t[0],@t[0],#1 6214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @t[3],@t[3],@t[2] 6224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strb @t[3],[r14],#1 @ store output 6234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bne .Loop_tail 6244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ldone: 6264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add sp,sp,#4*(32+3) 6274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lno_data: 6284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia sp!,{r4-r11,pc} 6294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.size ChaCha20_ctr32,.-ChaCha20_ctr32 6304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 6314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin{{{ 6334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$t0,$t1,$t2,$t3) = 6344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin map("q$_",(0..15)); 6354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminsub NEONROUND { 6374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy $odd = pop; 6384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminmy ($a,$b,$c,$d,$t)=@_; 6394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ( 6414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vadd_i32 ($a,$a,$b)", 6424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&veor ($d,$d,$a)", 6434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vrev32_16 ($d,$d)", # vrot ($d,16) 6444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vadd_i32 ($c,$c,$d)", 6464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&veor ($t,$b,$c)", 6474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vshr_u32 ($b,$t,20)", 6484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vsli_32 ($b,$t,12)", 6494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vadd_i32 ($a,$a,$b)", 6514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&veor ($t,$d,$a)", 6524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vshr_u32 ($d,$t,24)", 6534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vsli_32 ($d,$t,8)", 6544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vadd_i32 ($c,$c,$d)", 6564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&veor ($t,$b,$c)", 6574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vshr_u32 ($b,$t,25)", 6584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vsli_32 ($b,$t,7)", 6594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vext_8 ($c,$c,$c,8)", 6614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vext_8 ($b,$b,$b,$odd?12:4)", 6624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin "&vext_8 ($d,$d,$d,$odd?4:12)" 6634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ); 6644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} 6654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___; 6674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#if __ARM_MAX_ARCH__>=7 6684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.arch armv7-a 6694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.fpu neon 6704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.type ChaCha20_neon,%function 6724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 5 6734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid BenjaminChaCha20_neon: 6744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r12,[sp,#0] @ pull pointer to counter and nonce 6754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmdb sp!,{r0-r2,r4-r11,lr} 6764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.LChaCha20_neon: 6774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin adr r14,.Lsigma 6784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vstmdb sp!,{d8-d15} @ ABI spec says so 6794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmdb sp!,{r0-r3} 6804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$b0-$c0},[r3] @ load key 6824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia r3,{r4-r11} @ load key 6834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub sp,sp,#4*(16+16) 6854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$d0},[r12] @ load counter and nonce 6864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add r12,sp,#4*8 6874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia r14,{r0-r3} @ load sigma 6884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$a0},[r14]! @ load sigma 6894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$t0},[r14] @ one 6904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.32 {$c0-$d0},[r12] @ copy 1/2key|counter|nonce 6914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.32 {$a0-$b0},[sp] @ copy sigma|1/2key 6924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 6934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r10,[sp,#4*(16+10)] @ off-load "@x[10]" 6944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r11,[sp,#4*(16+11)] @ off-load "@x[11]" 6954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vshl.i32 $t1#lo,$t0#lo,#1 @ two 6964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vstr $t0#lo,[sp,#4*(16+0)] 6974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vshl.i32 $t2#lo,$t0#lo,#2 @ four 6984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vstr $t1#lo,[sp,#4*(16+2)] 6994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $a1,$a0 7004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vstr $t2#lo,[sp,#4*(16+4)] 7014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $a2,$a0 7024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $b1,$b0 7034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $b2,$b0 7044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop_neon_enter 7054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 7074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_neon_outer: 7084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia sp,{r0-r9} @ load key material 7094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64*2 @ if len<=64*2 7104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bls .Lbreak_neon @ switch to integer-only 7114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $a1,$a0 7124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[3],[sp,#4*(32+2)] @ save len 7134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $a2,$a0 7144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r12, [sp,#4*(32+1)] @ save inp 7154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $b1,$b0 7164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r14, [sp,#4*(32+0)] @ save out 7174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $b2,$b0 7184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_neon_enter: 7194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3], [sp,#4*(15)] 7204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d1,$d0,$t0 @ counter+1 7214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[12],[sp,#4*(12)] @ modulo-scheduled load 7224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $c1,$c0 7234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2], [sp,#4*(13)] 7244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vmov $c2,$c0 7254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[14],[sp,#4*(14)] 7264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d2,$d1,$t0 @ counter+2 7274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[3], [sp,#4*(16+15)] 7284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin mov @t[3],#10 729a94fe0531b3c196ad078174259af2201b2e3a246Robert Sloan add @x[12],@x[12],#3 @ counter+3 7304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop_neon 7314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 7334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_neon: 7344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin subs @t[3],@t[3],#1 7354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 7364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin my @thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,0); 7374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin my @thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,0); 7384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin my @thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,0); 7394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin my @thread3=&ROUND(0,4,8,12); 7404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin foreach (@thread0) { 7424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eval; eval(shift(@thread3)); 7434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eval(shift(@thread1)); eval(shift(@thread3)); 7444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eval(shift(@thread2)); eval(shift(@thread3)); 7454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin } 7464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,1); 7484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,1); 7494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,1); 7504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @thread3=&ROUND(0,5,10,15); 7514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin foreach (@thread0) { 7534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eval; eval(shift(@thread3)); 7544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eval(shift(@thread1)); eval(shift(@thread3)); 7554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eval(shift(@thread2)); eval(shift(@thread3)); 7564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin } 7574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin$code.=<<___; 7584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bne .Loop_neon 7594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[3],sp,#32 7614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$t0-$t1},[sp] @ load key material 7624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$t2-$t3},[@t[3]] 7634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[sp,#4*(32+2)] @ load len 7654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[0], [sp,#4*(16+8)] @ modulo-scheduled store 7674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[1], [sp,#4*(16+9)] 7684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[12],[sp,#4*(16+12)] 7694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[2], [sp,#4*(16+13)] 7704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[14],[sp,#4*(16+14)] 7714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ at this point we have first half of 512-bit result in 7734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ @x[0-7] and second half at sp+4*(16+8) 7744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r12,[sp,#4*(32+1)] @ load inp 7764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr r14,[sp,#4*(32+0)] @ load out 7774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $a0,$a0,$t0 @ accumulate key material 7794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $a1,$a1,$t0 7804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $a2,$a2,$t0 7814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vldr $t0#lo,[sp,#4*(16+0)] @ one 7824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $b0,$b0,$t1 7844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $b1,$b1,$t1 7854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $b2,$b2,$t1 7864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vldr $t1#lo,[sp,#4*(16+2)] @ two 7874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $c0,$c0,$t2 7894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $c1,$c1,$t2 7904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $c2,$c2,$t2 7914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d1#lo,$d1#lo,$t0#lo @ counter+1 7924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d2#lo,$d2#lo,$t1#lo @ counter+2 7934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d0,$d0,$t3 7954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d1,$d1,$t3 7964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d2,$d2,$t3 7974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 7984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64*4 7994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin blo .Ltail_neon 8004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! @ load input 8024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin mov @t[3],sp 8034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 8044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a0,$a0,$t0 @ xor with input 8054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b0,$b0,$t1 8064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 8074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c0,$c0,$t2 8084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d0,$d0,$t3 8094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 8104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a1,$a1,$t0 8124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a0-$b0},[r14]! @ store output 8134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b1,$b1,$t1 8144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 8154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c1,$c1,$t2 8164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c0-$d0},[r14]! 8174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d1,$d1,$t3 8184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 8194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a2,$a2,$t0 8214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$a0-$b0},[@t[3]]! @ load for next iteration 8224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $t0#hi,$t0#hi,$t0#hi 8234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vldr $t0#lo,[sp,#4*(16+4)] @ four 8244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b2,$b2,$t1 8254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.32 {$c0-$d0},[@t[3]] 8264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c2,$c2,$t2 8274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a1-$b1},[r14]! 8284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d2,$d2,$t3 8294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c1-$d1},[r14]! 8304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vadd.i32 $d0#lo,$d0#lo,$t0#lo @ next counter value 8324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vldr $t0#lo,[sp,#4*(16+0)] @ one 8334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia sp,{@t[0]-@t[3]} @ load key material 8354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],@x[0],@t[0] @ accumulate key material 8364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[0],[r12],#16 @ load input 8374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a2-$b2},[r14]! 8384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[1],@x[1],@t[1] 8394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[1],[r12,#-12] 8404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c2-$d2},[r14]! 8414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[2],@x[2],@t[2] 8424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2],[r12,#-8] 8434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[3],@x[3],@t[3] 8444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[r12,#-4] 8454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __ARMEB__ 8464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[0],@x[0] 8474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[1],@x[1] 8484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[2],@x[2] 8494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[3],@x[3] 8504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 8514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[0],@x[0],@t[0] @ xor with input 8524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(4) 8534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[1],@x[1],@t[1] 8544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[0],[r14],#16 @ store output 8554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[2],@x[2],@t[2] 8564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[1],[r14,#-12] 8574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[3],@x[3],@t[3] 8584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 8594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[2],[r14,#-8] 8604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[3],[r14,#-4] 8614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],@t[0] @ accumulate key material 8634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[0],[r12],#16 @ load input 8644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[5],@x[5],@t[1] 8654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[1],[r12,#-12] 8664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[6],@x[6],@t[2] 8674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2],[r12,#-8] 8684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[7],@x[7],@t[3] 8694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[r12,#-4] 8704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __ARMEB__ 8714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[4],@x[4] 8724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[5],@x[5] 8734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[6],@x[6] 8744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[7],@x[7] 8754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 8764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[4],@x[4],@t[0] 8774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(8) 8784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[5],@x[5],@t[1] 8794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[4],[r14],#16 @ store output 8804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[6],@x[6],@t[2] 8814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[5],[r14,#-12] 8824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[7],@x[7],@t[3] 8834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 8844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[6],[r14,#-8] 8854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],sp,#4*(16+8) 8864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[7],[r14,#-4] 8874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @x[0],{@x[0]-@x[7]} @ load second half 8894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 8904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],@x[0],@t[0] @ accumulate key material 8914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[0],[r12],#16 @ load input 8924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[1],@x[1],@t[1] 8934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[1],[r12,#-12] 8944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 8954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin it hi 8964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 8974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[2],[sp,#4*(16+10)] @ copy "@x[10]" while at it 8984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[2],@x[2],@t[2] 8994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2],[r12,#-8] 9004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 9014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin it hi 9024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 9034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin strhi @t[3],[sp,#4*(16+11)] @ copy "@x[11]" while at it 9044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[3],@x[3],@t[3] 9054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[r12,#-4] 9064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __ARMEB__ 9074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[0],@x[0] 9084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[1],@x[1] 9094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[2],@x[2] 9104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[3],@x[3] 9114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 9124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[0],@x[0],@t[0] 9134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(12) 9144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[1],@x[1],@t[1] 9154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[0],[r14],#16 @ store output 9164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[2],@x[2],@t[2] 9174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[1],[r14,#-12] 9184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[3],@x[3],@t[3] 9194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 9204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[2],[r14,#-8] 9214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[3],[r14,#-4] 9224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],@t[0] @ accumulate key material 9244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],@t[0],#4 @ next counter value 9254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[5],@x[5],@t[1] 9264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[0],[sp,#4*(12)] @ save next counter value 9274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[0],[r12],#16 @ load input 9284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[6],@x[6],@t[2] 9294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],#3 @ counter+3 9304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[1],[r12,#-12] 9314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[7],@x[7],@t[3] 9324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2],[r12,#-8] 9334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[r12,#-4] 9344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __ARMEB__ 9354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[4],@x[4] 9364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[5],@x[5] 9374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[6],@x[6] 9384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[7],@x[7] 9394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 9404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[4],@x[4],@t[0] 9414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __thumb2__ 9424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin it hi 9434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 9444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrhi @t[0],[sp,#4*(32+2)] @ re-load len 9454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[5],@x[5],@t[1] 9464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[6],@x[6],@t[2] 9474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[4],[r14],#16 @ store output 9484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @x[7],@x[7],@t[3] 9494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[5],[r14,#-12] 9504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub @t[3],@t[0],#64*4 @ len-=64*4 9514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[6],[r14,#-8] 9524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[7],[r14,#-4] 9534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bhi .Loop_neon_outer 9544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Ldone_neon 9564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 9584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Lbreak_neon: 9594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ harmonize NEON and integer-only stack frames: load data 9604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ from NEON frame, but save to integer-only one; distance 9614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin @ between the two is 4*(32+4+16-32)=4*(20). 9624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[3], [sp,#4*(20+32+2)] @ save len 9644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[3],sp,#4*(32+4) 9654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r12, [sp,#4*(20+32+1)] @ save inp 9664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str r14, [sp,#4*(20+32+0)] @ save out 9674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[12],[sp,#4*(16+10)] 9694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[14],[sp,#4*(16+11)] 9704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vldmia @t[3],{d8-d15} @ fulfill ABI requirement 9714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[12],[sp,#4*(20+16+10)] @ copy "@x[10]" 9724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @x[14],[sp,#4*(20+16+11)] @ copy "@x[11]" 9734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3], [sp,#4*(15)] 9754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[12],[sp,#4*(12)] @ modulo-scheduled load 9764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[2], [sp,#4*(13)] 9774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @x[14],[sp,#4*(14)] 9784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin str @t[3], [sp,#4*(20+16+15)] 9794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[3],sp,#4*(20) 9804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.32 {$a0-$b0},[@t[3]]! @ copy key 9814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add sp,sp,#4*(20) @ switch frame 9824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.32 {$c0-$d0},[@t[3]] 9834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin mov @t[3],#10 9844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop @ go integer-only 9854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 9874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ltail_neon: 9884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64*3 9894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bhs .L192_or_more_neon 9904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64*2 9914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bhs .L128_or_more_neon 9924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin cmp @t[3],#64*1 9934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bhs .L64_or_more_neon 9944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 9954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(8) 9964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a0-$b0},[sp] 9974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[2],sp,#4*(0) 9984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c0-$d0},[@t[0]] 9994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop_tail_neon 10004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 10024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.L64_or_more_neon: 10034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 10044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 10054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a0,$a0,$t0 10064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b0,$b0,$t1 10074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c0,$c0,$t2 10084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d0,$d0,$t3 10094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a0-$b0},[r14]! 10104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c0-$d0},[r14]! 10114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin beq .Ldone_neon 10134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(8) 10154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a1-$b1},[sp] 10164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[2],sp,#4*(0) 10174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c1-$d1},[@t[0]] 10184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub @t[3],@t[3],#64*1 @ len-=64*1 10194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop_tail_neon 10204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 10224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.L128_or_more_neon: 10234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 10244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 10254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a0,$a0,$t0 10264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b0,$b0,$t1 10274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 10284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c0,$c0,$t2 10294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d0,$d0,$t3 10304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 10314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a1,$a1,$t0 10334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b1,$b1,$t1 10344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a0-$b0},[r14]! 10354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c1,$c1,$t2 10364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c0-$d0},[r14]! 10374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d1,$d1,$t3 10384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a1-$b1},[r14]! 10394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c1-$d1},[r14]! 10404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin beq .Ldone_neon 10424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10434969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(8) 10444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a2-$b2},[sp] 10454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[2],sp,#4*(0) 10464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c2-$d2},[@t[0]] 10474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub @t[3],@t[3],#64*2 @ len-=64*2 10484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin b .Loop_tail_neon 10494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.align 4 10514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.L192_or_more_neon: 10524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 10534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 10544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a0,$a0,$t0 10554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b0,$b0,$t1 10564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 10574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c0,$c0,$t2 10584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d0,$d0,$t3 10594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 10604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a1,$a1,$t0 10624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b1,$b1,$t1 10634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t0-$t1},[r12]! 10644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c1,$c1,$t2 10654969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a0-$b0},[r14]! 10664969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d1,$d1,$t3 10674969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vld1.8 {$t2-$t3},[r12]! 10684969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10694969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $a2,$a2,$t0 10704969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c0-$d0},[r14]! 10714969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $b2,$b2,$t1 10724969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a1-$b1},[r14]! 10734969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $c2,$c2,$t2 10744969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c1-$d1},[r14]! 10754969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin veor $d2,$d2,$t3 10764969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$a2-$b2},[r14]! 10774969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vst1.8 {$c2-$d2},[r14]! 10784969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10794969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin beq .Ldone_neon 10804969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10814969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia sp,{@t[0]-@t[3]} @ load key material 10824969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],@x[0],@t[0] @ accumulate key material 10834969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(4) 10844969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[1],@x[1],@t[1] 10854969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[2],@x[2],@t[2] 10864969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[3],@x[3],@t[3] 10874969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 10884969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 10894969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],@t[0] @ accumulate key material 10904969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(8) 10914969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[5],@x[5],@t[1] 10924969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[6],@x[6],@t[2] 10934969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[7],@x[7],@t[3] 10944969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 10954969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __ARMEB__ 10964969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[0],@x[0] 10974969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[1],@x[1] 10984969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[2],@x[2] 10994969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[3],@x[3] 11004969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[4],@x[4] 11014969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[5],@x[5] 11024969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[6],@x[6] 11034969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[7],@x[7] 11044969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 11054969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmia sp,{@x[0]-@x[7]} 11064969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],sp,#4*(16+8) 11074969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11084969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @x[0],{@x[0]-@x[7]} @ load second half 11094969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11104969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[0],@x[0],@t[0] @ accumulate key material 11114969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(12) 11124969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[1],@x[1],@t[1] 11134969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[2],@x[2],@t[2] 11144969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[3],@x[3],@t[3] 11154969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia @t[0],{@t[0]-@t[3]} @ load key material 11164969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11174969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],@t[0] @ accumulate key material 11184969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[0],sp,#4*(8) 11194969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[5],@x[5],@t[1] 11204969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[4],@x[4],#3 @ counter+3 11214969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[6],@x[6],@t[2] 11224969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @x[7],@x[7],@t[3] 11234969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldr @t[3],[sp,#4*(32+2)] @ re-load len 11244969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# ifdef __ARMEB__ 11254969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[0],@x[0] 11264969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[1],@x[1] 11274969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[2],@x[2] 11284969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[3],@x[3] 11294969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[4],@x[4] 11304969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[5],@x[5] 11314969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[6],@x[6] 11324969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin rev @x[7],@x[7] 11334969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin# endif 11344969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin stmia @t[0],{@x[0]-@x[7]} 11354969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add @t[2],sp,#4*(0) 11364969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin sub @t[3],@t[3],#64*3 @ len-=64*3 11374969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11384969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Loop_tail_neon: 11394969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrb @t[0],[@t[2]],#1 @ read buffer on stack 11404969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldrb @t[1],[r12],#1 @ read input 11414969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin subs @t[3],@t[3],#1 11424969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin eor @t[0],@t[0],@t[1] 1143a94fe0531b3c196ad078174259af2201b2e3a246Robert Sloan strb @t[0],[r14],#1 @ store output 11444969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin bne .Loop_tail_neon 11454969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11464969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.Ldone_neon: 11474969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add sp,sp,#4*(32+4) 11484969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin vldmia sp,{d8-d15} 11494969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin add sp,sp,#4*(16+3) 11504969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin ldmia sp!,{r4-r11,pc} 11514969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.size ChaCha20_neon,.-ChaCha20_neon 11524969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin.comm OPENSSL_armcap_P,4,4 11534969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin#endif 11544969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin___ 11554969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin}}} 11564969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11574969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminforeach (split("\n",$code)) { 11584969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin s/\`([^\`]*)\`/eval $1/geo; 11594969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11604969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo; 11614969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin 11624969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin print $_,"\n"; 11634969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjamin} 11644969cc9b0ab2905ec478277f50ed3849b37a6c6bDavid Benjaminclose STDOUT; 1165