1656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project#!/usr/bin/env perl 2656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 3656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# ==================================================================== 43f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# project. The module is, however, dual licensed under OpenSSL and 6656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# CRYPTOGAMS licenses depending on where you obtain it. For further 7656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# details see http://www.openssl.org/~appro/cryptogams/. 8656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# ==================================================================== 9656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 10656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# January 2007. 11656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 12656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# Montgomery multiplication for ARMv4. 13656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# 14656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# Performance improvement naturally varies among CPU implementations 15656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# and compilers. The code was observed to provide +65-35% improvement 16656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# [depending on key length, less for longer keys] on ARM920T, and 17656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code 18656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# base and compiler generated code with in-lined umull and even umlal 19656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# instructions. The latter means that this code didn't really have an 20656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# "advantage" of utilizing some "secret" instruction. 21656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# 22656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# The code is interoperable with Thumb ISA and is rather compact, less 23656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# than 1/2KB. Windows CE port would be trivial, as it's exclusively 24656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# about decorations, ABI and instruction syntax are identical. 25656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# November 2013 273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# 283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# Add NEON code path, which handles lengths divisible by 8. RSA/DSA 293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# performance improvement on Cortex-A8 is ~45-100% depending on key 303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# length, more for longer keys. On Cortex-A15 the span is ~10-105%. 313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# On Snapdragon S4 improvement was measured to vary from ~70% to 323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is 333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# rather because original integer-only code seems to perform 343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# suboptimally on S4. Situation on Cortex-A9 is unfortunately 353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# different. It's being looked into, but the trouble is that 363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# performance for vectors longer than 256 bits is actually couple 373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# of percent worse than for integer-only code. The code is chosen 383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# for execution on all NEON-capable processors, because gain on 393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root# others outweighs the marginal loss on Cortex-A9. 403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromopen STDOUT,">$output"; 43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 44656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$num="r0"; # starts as num argument, but holds &tp[num-1] 45656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$ap="r1"; 46656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$bp="r2"; $bi="r2"; $rp="r2"; 47656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$np="r3"; 48656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$tp="r4"; 49656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$aj="r5"; 50656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$nj="r6"; 51656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$tj="r7"; 52656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$n0="r8"; 53656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer 54656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$alo="r10"; # sl, gcc uses it to keep @GOT 55656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$ahi="r11"; # fp 56656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$nlo="r12"; # ip 57656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project########### # r13 is stack pointer 58656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$nhi="r14"; # lr 59656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project########### # r15 is program counter 60656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 61656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project#### argument block layout relative to &tp[num-1], a.k.a. $num 62656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$_rp="$num,#12*4"; 63656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# ap permanently resides in r1 64656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$_bp="$num,#13*4"; 65656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project# np permanently resides in r3 66656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$_n0="$num,#14*4"; 67656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$_num="$num,#15*4"; $_bpend=$_num; 68656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 69656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code=<<___; 703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#include "arm_arch.h" 713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 72656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.text 733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.code 32 743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7 763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 5 773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LOPENSSL_armcap: 783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.word OPENSSL_armcap_P-bn_mul_mont 793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 80656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 81656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.global bn_mul_mont 82656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.type bn_mul_mont,%function 83656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 5 85656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectbn_mul_mont: 863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr ip,[sp,#4] @ load num 87656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project stmdb sp!,{r0,r2} @ sp points at argument block 883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7 893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root tst ip,#7 903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .Lialu 913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root adr r0,bn_mul_mont 923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr r2,.LOPENSSL_armcap 933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr r0,[r0,r2] 943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root tst r0,#1 @ NEON available? 953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia sp, {r0,r2} 963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root beq .Lialu 973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add sp,sp,#8 983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root b bn_mul8x_mont_neon 993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 4 1003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.Lialu: 1013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 1023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root cmp ip,#2 1033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $num,ip @ load num 104656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project movlt r0,#0 105656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project addlt sp,sp,#2*4 106656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project blt .Labrt 107656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 108656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project stmdb sp!,{r4-r12,lr} @ save 10 registers 109656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 110656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $num,$num,lsl#2 @ rescale $num for byte count 111656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub sp,sp,$num @ alloca(4*num) 112656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub sp,sp,#4 @ +extra dword 113656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub $num,$num,#4 @ "num=num-1" 114656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $tp,$bp,$num @ &bp[num-1] 115656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 116656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $num,sp,$num @ $num to point at &tp[num-1] 117656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $n0,[$_n0] @ &n0 118656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $bi,[$bp] @ bp[0] 119656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $aj,[$ap],#4 @ ap[0],ap++ 120656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $nj,[$np],#4 @ np[0],np++ 121656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $n0,[$n0] @ *n0 122656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $tp,[$_bpend] @ save &bp[num] 123656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 124656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0] 125656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $n0,[$_n0] @ save n0 value 126656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mul $n0,$alo,$n0 @ "tp[0]"*n0 127656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $nlo,#0 128656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]" 129656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $tp,sp 130656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 131656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.L1st: 132656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $aj,[$ap],#4 @ ap[j],ap++ 133656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $alo,$ahi 134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $nj,[$np],#4 @ np[j],np++ 135656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $ahi,#0 136656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0] 137656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $nhi,#0 138656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 139656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adds $nlo,$nlo,$alo 140656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $nlo,[$tp],#4 @ tp[j-1]=,tp++ 141656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adc $nlo,$nhi,#0 142656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project cmp $tp,$num 143656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bne .L1st 144656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 145656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adds $nlo,$nlo,$ahi 146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $tp,[$_bp] @ restore bp 147656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $nhi,#0 148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $n0,[$_n0] @ restore n0 149656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adc $nhi,$nhi,#0 150656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $nlo,[$num] @ tp[num-1]= 151656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $nhi,[$num,#4] @ tp[num]= 152656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 153656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.Louter: 154656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub $tj,$num,sp @ "original" $num-1 value 155656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub $ap,$ap,$tj @ "rewind" ap to &ap[1] 156656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $bi,[$tp,#4]! @ *(++bp) 157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub $np,$np,$tj @ "rewind" np to &np[1] 158656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $aj,[$ap,#-4] @ ap[0] 159656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $alo,[sp] @ tp[0] 160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $nj,[$np,#-4] @ np[0] 161656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $tj,[sp,#4] @ tp[1] 162656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 163656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $ahi,#0 164656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0] 165656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $tp,[$_bp] @ save bp 166656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mul $n0,$alo,$n0 167656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $nlo,#0 168656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]" 169656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $tp,sp 170656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 171656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.Linner: 172656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $aj,[$ap],#4 @ ap[j],ap++ 173656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adds $alo,$ahi,$tj @ +=tp[j] 174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $nj,[$np],#4 @ np[j],np++ 175656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $ahi,#0 176656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i] 177656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $nhi,#0 178656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 179656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adc $ahi,$ahi,#0 180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $tj,[$tp,#8] @ tp[j+1] 181656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adds $nlo,$nlo,$alo 182656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $nlo,[$tp],#4 @ tp[j-1]=,tp++ 183656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adc $nlo,$nhi,#0 184656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project cmp $tp,$num 185656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bne .Linner 186656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 187656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adds $nlo,$nlo,$ahi 188656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $nhi,#0 189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $tp,[$_bp] @ restore bp 190656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adc $nhi,$nhi,#0 191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ldr $n0,[$_n0] @ restore n0 192656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project adds $nlo,$nlo,$tj 193656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $tj,[$_bpend] @ restore &bp[num] 194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom adc $nhi,$nhi,#0 195656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $nlo,[$num] @ tp[num-1]= 196656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $nhi,[$num,#4] @ tp[num]= 197656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 198656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project cmp $tp,$tj 199656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bne .Louter 200656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 201656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $rp,[$_rp] @ pull rp 202656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add $num,$num,#4 @ $num to point at &tp[num] 203656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub $aj,$num,sp @ "original" num value 204656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $tp,sp @ "rewind" $tp 205656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $ap,$tp @ "borrow" $ap 206656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub $np,$np,$aj @ "rewind" $np to &np[0] 207656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 208656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project subs $tj,$tj,$tj @ "clear" carry flag 209656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.Lsub: ldr $tj,[$tp],#4 210656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldr $nj,[$np],#4 211656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sbcs $tj,$tj,$nj @ tp[j]-np[j] 212656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $tj,[$rp],#4 @ rp[j]= 213656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project teq $tp,$num @ preserve carry 214656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bne .Lsub 215656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sbcs $nhi,$nhi,#0 @ upmost carry 216656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov $tp,sp @ "rewind" $tp 217656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project sub $rp,$rp,$aj @ "rewind" $rp 218656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 219656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project and $ap,$tp,$nhi 220656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bic $np,$rp,$nhi 221656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project orr $ap,$ap,$np @ ap=borrow?tp:rp 222656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 223656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh 224656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str sp,[$tp],#4 @ zap tp 225656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project str $tj,[$rp],#4 226656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project cmp $tp,$num 227656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bne .Lcopy 228656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 229656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add sp,$num,#4 @ skip over tp[num+1] 230656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project ldmia sp!,{r4-r12,lr} @ restore registers 231656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project add sp,sp,#2*4 @ skip over {r0,r2} 232656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project mov r0,#1 2333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.Labrt: 2343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=5 2353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ret @ bx lr 2363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#else 2373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root tst lr,#1 238656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project moveq pc,lr @ be binary compatible with V4, yet 239656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project bx lr @ interoperable with Thumb ISA:-) 2403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 241656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project.size bn_mul_mont,.-bn_mul_mont 2423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 2433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root{ 2443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } 2453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootsub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } 2463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); 2483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($N0,$N1,$N2,$N3)=map("d$_",(4..7)); 2493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($Z,$Temp)=("q4","q5"); 2503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13)); 2513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($Bi,$Ni,$M0)=map("d$_",(28..31)); 2523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $zero=&Dlo($Z); 2533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy $temp=&Dlo($Temp); 2543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5)); 2563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootmy ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9)); 2573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 2593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7 2603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.fpu neon 2613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.type bn_mul8x_mont_neon,%function 2633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 5 2643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Rootbn_mul8x_mont_neon: 2653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov ip,sp 2663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root stmdb sp!,{r4-r11} 2673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vstmdb sp!,{d8-d15} @ ABI specification says so 2683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia ip,{r4-r5} @ load rest of parameter block 2693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $toutptr,sp,#16 2713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {${Bi}[0]}, [$bptr,:32]! 2723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $toutptr,$toutptr,$num,lsl#4 2733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-( 2743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root and $toutptr,$toutptr,#-64 2753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {${M0}[0]}, [$n0,:32] 2763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov sp,$toutptr @ alloca 2773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $zero,$zero,$zero 2783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subs $inner,$num,#8 2793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 $Bi,$zero 2803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A0xB,$Bi,${A0}[0] 2823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A1xB,$Bi,${A0}[1] 2833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A2xB,$Bi,${A1}[0] 2843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshl.i64 $temp,`&Dhi("$A0xB")`,#16 2853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A3xB,$Bi,${A1}[1] 2863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 $temp,$temp,`&Dlo("$A0xB")` 2883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $zero,$zero,$zero 2893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmul.u32 $Ni,$temp,$M0 2903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A4xB,$Bi,${A2}[0] 2923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$N0-$N3}, [$nptr]! 2933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A5xB,$Bi,${A2}[1] 2943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A6xB,$Bi,${A3}[0] 2953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 $Ni,$zero 2963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A7xB,$Bi,${A3}[1] 2973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 2983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_1st 2993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root @ special case for num=8, everything is in register bank... 3013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Ni,${N0}[0] 3033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $outer,$num,#1 3043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Ni,${N0}[1] 3053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Ni,${N1}[0] 3063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Ni,${N1}[1] 3073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Ni,${N2}[0] 3093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $Temp,$A0xB 3103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Ni,${N2}[1] 3113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A0xB,$A1xB 3123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Ni,${N3}[0] 3133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A1xB,$A2xB 3143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Ni,${N3}[1] 3153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A2xB,$A3xB 3163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A3xB,$A4xB 3173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 3183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A4xB,$A5xB 3193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A5xB,$A6xB 3203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 $temp,$temp,`&Dhi("$Temp")` 3213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A6xB,$A7xB 3223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $A7xB,$A7xB 3233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 3243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root b .LNEON_outer8 3263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 4 3283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_outer8: 3293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {${Bi}[0]}, [$bptr,:32]! 3303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $zero,$zero,$zero 3313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 $Bi,$zero 3323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp 3333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Bi,${A0}[0] 3353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Bi,${A0}[1] 3363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Bi,${A1}[0] 3373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshl.i64 $temp,`&Dhi("$A0xB")`,#16 3383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Bi,${A1}[1] 3393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 $temp,$temp,`&Dlo("$A0xB")` 3413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $zero,$zero,$zero 3423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subs $outer,$outer,#1 3433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmul.u32 $Ni,$temp,$M0 3443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Bi,${A2}[0] 3463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Bi,${A2}[1] 3473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Bi,${A3}[0] 3483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 $Ni,$zero 3493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Bi,${A3}[1] 3503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Ni,${N0}[0] 3523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Ni,${N0}[1] 3533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Ni,${N1}[0] 3543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Ni,${N1}[1] 3553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Ni,${N2}[0] 3573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $Temp,$A0xB 3583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Ni,${N2}[1] 3593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A0xB,$A1xB 3603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Ni,${N3}[0] 3613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A1xB,$A2xB 3623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Ni,${N3}[1] 3633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A2xB,$A3xB 3643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A3xB,$A4xB 3653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 3663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A4xB,$A5xB 3673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A5xB,$A6xB 3683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 $temp,$temp,`&Dhi("$Temp")` 3693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmov $A6xB,$A7xB 3703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $A7xB,$A7xB 3713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 3723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_outer8 3743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp 3763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $toutptr,sp 3773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A0xB")`,#16 3783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $inner,$num 3793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp 3803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $tinptr,sp,#16 3813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A0xB")`,#16 3823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` 3833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root b .LNEON_tail2 3853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 4 3873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_1st: 3883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Ni,${N0}[0] 3893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$A0-$A3}, [$aptr]! 3903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Ni,${N0}[1] 3913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subs $inner,$inner,#8 3923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Ni,${N1}[0] 3933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Ni,${N1}[1] 3943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 3953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Ni,${N2}[0] 3963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$N0-$N1}, [$nptr]! 3973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Ni,${N2}[1] 3983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! 3993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Ni,${N3}[0] 4003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Ni,${N3}[1] 4013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! 4023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A0xB,$Bi,${A0}[0] 4043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$N2-$N3}, [$nptr]! 4053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A1xB,$Bi,${A0}[1] 4063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! 4073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A2xB,$Bi,${A1}[0] 4083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A3xB,$Bi,${A1}[1] 4093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! 4103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A4xB,$Bi,${A2}[0] 4123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A5xB,$Bi,${A2}[1] 4133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A6xB,$Bi,${A3}[0] 4143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmull.u32 $A7xB,$Bi,${A3}[1] 4153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_1st 4173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Ni,${N0}[0] 4193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $tinptr,sp,#16 4203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Ni,${N0}[1] 4213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr 4223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Ni,${N1}[0] 4233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$Temp}, [sp,:128] 4243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Ni,${N1}[1] 4253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $outer,$num,#1 4263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Ni,${N2}[0] 4283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! 4293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Ni,${N2}[1] 4303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 4313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A0xB}, [$tinptr, :128]! 4323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Ni,${N3}[0] 4333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! 4343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Ni,${N3}[1] 4353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! 4373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 $temp,$temp,`&Dhi("$Temp")` 4383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $Z,$Z,$Z 4393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! 4403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! 4413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$Z}, [$toutptr,:128] 4423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 4433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root b .LNEON_outer 4453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.align 4 4473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_outer: 4483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {${Bi}[0]}, [$bptr,:32]! 4493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr 4503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$A0-$A3}, [$aptr]! 4513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $zero,$zero,$zero 4523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $toutptr,sp 4533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 $Bi,$zero 4543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $inner,$num,#8 4553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp 4563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Bi,${A0}[0] 4583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A3xB-$A4xB},[$tinptr,:256]! 4593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Bi,${A0}[1] 4603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Bi,${A1}[0] 4613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A5xB-$A6xB},[$tinptr,:256]! 4623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Bi,${A1}[1] 4633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshl.i64 $temp,`&Dhi("$A0xB")`,#16 4653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor $zero,$zero,$zero 4663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 $temp,$temp,`&Dlo("$A0xB")` 4673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A7xB},[$tinptr,:128]! 4683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmul.u32 $Ni,$temp,$M0 4693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Bi,${A2}[0] 4713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$N0-$N3}, [$nptr]! 4723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Bi,${A2}[1] 4733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Bi,${A3}[0] 4743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 $Ni,$zero 4753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Bi,${A3}[1] 4763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_inner: 4783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Ni,${N0}[0] 4793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$A0-$A3}, [$aptr]! 4803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Ni,${N0}[1] 4813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subs $inner,$inner,#8 4823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Ni,${N1}[0] 4833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Ni,${N1}[1] 4843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! 4853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Ni,${N2}[0] 4873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A0xB}, [$tinptr, :128]! 4883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Ni,${N2}[1] 4893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! 4903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Ni,${N3}[0] 4913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! 4923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Ni,${N3}[1] 4933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! 4943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 4953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Bi,${A0}[0] 4963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! 4973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Bi,${A0}[1] 4983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! 4993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Bi,${A1}[0] 5003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! 5013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Bi,${A1}[1] 5023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.32 {$N0-$N3}, [$nptr]! 5033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Bi,${A2}[0] 5053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A7xB}, [$tinptr, :128]! 5063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Bi,${A2}[1] 5073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Bi,${A3}[0] 5083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Bi,${A3}[1] 5093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_inner 5113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A0xB,$Ni,${N0}[0] 5133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $tinptr,sp,#16 5143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A1xB,$Ni,${N0}[1] 5153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr 5163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A2xB,$Ni,${N1}[0] 5173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$Temp}, [sp,:128] 5183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A3xB,$Ni,${N1}[1] 5193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subs $outer,$outer,#1 5203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A4xB,$Ni,${N2}[0] 5223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! 5233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A5xB,$Ni,${N2}[1] 5243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A0xB}, [$tinptr, :128]! 5253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 5263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! 5273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A6xB,$Ni,${N3}[0] 5283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! 5293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vmlal.u32 $A7xB,$Ni,${N3}[1] 5303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! 5323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 $temp,$temp,`&Dhi("$Temp")` 5333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! 5343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,$temp,#16 5353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_outer 5373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $toutptr,sp 5393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $inner,$num 5403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_tail: 5423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp 5433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! 5443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A0xB")`,#16 5453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp 5463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! 5473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A0xB")`,#16 5483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A7xB}, [$tinptr, :128]! 5493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` 5503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_tail2: 5523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp 5533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]! 5543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A1xB")`,#16 5553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp 5563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A1xB")`,#16 5573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")` 5583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp 5603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]! 5613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A2xB")`,#16 5623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp 5633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A2xB")`,#16 5643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")` 5653f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp 5673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]! 5683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A3xB")`,#16 5693f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp 5703f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A3xB")`,#16 5713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")` 5723f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp 5743f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]! 5753f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A4xB")`,#16 5763f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp 5773f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A4xB")`,#16 5783f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")` 5793f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5803f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp 5813f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]! 5823f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A5xB")`,#16 5833f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp 5843f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A5xB")`,#16 5853f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")` 5863f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5873f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp 5883f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]! 5893f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A6xB")`,#16 5903f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp 5913f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A0xB}, [$tinptr, :128]! 5923f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A6xB")`,#16 5933f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")` 5943f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 5953f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp 5963f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]! 5973f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dlo("$A7xB")`,#16 5983f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp 5993f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! 6003f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vshr.u64 $temp,`&Dhi("$A7xB")`,#16 6013f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")` 6023f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subs $inner,$inner,#8 6033f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]! 6043f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6053f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_tail 6063f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6073f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit 6083f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr 6093f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root subs $aptr,sp,#0 @ clear carry flag 6103f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root add $bptr,sp,$num,lsl#2 6113f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6123f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_sub: 6133f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia $aptr!, {r4-r7} 6143f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia $nptr!, {r8-r11} 6153f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sbcs r8, r4,r8 6163f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sbcs r9, r5,r9 6173f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sbcs r10,r6,r10 6183f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sbcs r11,r7,r11 6193f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root teq $aptr,$bptr @ preserves carry 6203f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root stmia $rptr!, {r8-r11} 6213f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_sub 6223f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6233f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldr r10, [$aptr] @ load top-most bit 6243f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor q0,q0,q0 6253f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub r11,$bptr,sp @ this is num*4 6263f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root veor q1,q1,q1 6273f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $aptr,sp 6283f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $rptr,$rptr,r11 @ rewind $rptr 6293f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root mov $nptr,$bptr @ second 3/4th of frame 6303f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sbcs r10,r10,#0 @ result is carry flag 6313f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6323f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.LNEON_copy_n_zap: 6333f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia $aptr!, {r4-r7} 6343f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia $rptr, {r8-r11} 6353f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r8, r4 6363f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {q0-q1}, [$nptr,:256]! @ wipe 6373f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r9, r5 6383f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r10,r6 6393f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {q0-q1}, [$nptr,:256]! @ wipe 6403f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r11,r7 6413f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia $aptr, {r4-r7} 6423f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root stmia $rptr!, {r8-r11} 6433f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub $aptr,$aptr,#16 6443f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia $rptr, {r8-r11} 6453f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r8, r4 6463f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {q0-q1}, [$aptr,:256]! @ wipe 6473f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r9, r5 6483f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r10,r6 6493f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vst1.64 {q0-q1}, [$nptr,:256]! @ wipe 6503f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root movcc r11,r7 6513f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root teq $aptr,$bptr @ preserves carry 6523f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root stmia $rptr!, {r8-r11} 6533f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root bne .LNEON_copy_n_zap 6543f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root 6553f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root sub sp,ip,#96 6563f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root vldmia sp!,{d8-d15} 6573f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ldmia sp!,{r4-r11} 6583f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root ret @ bx lr 6593f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon 6603f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 6613f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root___ 6623f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root} 6633f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code.=<<___; 6643f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" 665221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align 2 6663f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#if __ARM_ARCH__>=7 6673f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root.comm OPENSSL_armcap_P,4,4 6683f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root#endif 669656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project___ 670656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project 6713f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code =~ s/\`([^\`]*)\`/eval $1/gem; 672656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Project$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 6733f9e6ada2c9f7183a41081263585e6a70bbd9f59Kenny Root$code =~ s/\bret\b/bx lr/gm; 674656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectprint $code; 675656d9c7f52f88b3a3daccafa7655dec086c4756eThe Android Open Source Projectclose STDOUT; 676