1480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org#!/usr/bin/env perl 2480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 3480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# ==================================================================== 4480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# project. The module is, however, dual licensed under OpenSSL and 6480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# CRYPTOGAMS licenses depending on where you obtain it. For further 7480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# details see http://www.openssl.org/~appro/cryptogams/. 8480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# ==================================================================== 9480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 10480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# December 2005 11480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# 12480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons 13480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# for undertaken effort are multiple. First of all, UltraSPARC is not 14480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# the whole SPARCv9 universe and other VIS-free implementations deserve 15480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# optimized code as much. Secondly, newly introduced UltraSPARC T1, 16480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes, 17480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with 18480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# several integrated RSA/DSA accelerator circuits accessible through 19480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# kernel driver [only(*)], but having decent user-land software 20480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# implementation is important too. Finally, reasons like desire to 21480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# experiment with dedicated squaring procedure. Yes, this module 22480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# implements one, because it was easiest to draft it in SPARCv9 23480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# instructions... 24480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 25480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# (*) Engine accessing the driver in question is on my TODO list. 26480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# For reference, acceleator is estimated to give 6 to 10 times 27480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# improvement on single-threaded RSA sign. It should be noted 28480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# that 6-10x improvement coefficient does not actually mean 29480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# something extraordinary in terms of absolute [single-threaded] 30480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# performance, as SPARCv9 instruction set is by all means least 31480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# suitable for high performance crypto among other 64 bit 32480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# platforms. 6-10x factor simply places T1 in same performance 33480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# domain as say AMD64 and IA-64. Improvement of RSA verify don't 34480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# appear impressive at all, but it's the sign operation which is 35480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# far more critical/interesting. 36480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 37480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# You might notice that inner loops are modulo-scheduled:-) This has 38480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# essentially negligible impact on UltraSPARC performance, it's 39480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# Fujitsu SPARC64 V users who should notice and hopefully appreciate 40480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# the advantage... Currently this module surpasses sparcv9a-mont.pl 41480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a 42480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# module still have hidden potential [see TODO list there], which is 43480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# estimated to be larger than 20%... 44480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 45480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org# int bn_mul_mont( 46480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$rp="%i0"; # BN_ULONG *rp, 47480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$ap="%i1"; # const BN_ULONG *ap, 48480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$bp="%i2"; # const BN_ULONG *bp, 49480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$np="%i3"; # const BN_ULONG *np, 50480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$n0="%i4"; # const BN_ULONG *n0, 51480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$num="%i5"; # int num); 52480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 53480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$bits=32; 54480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.orgfor (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } 55480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.orgif ($bits==64) { $bias=2047; $frame=192; } 56480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.orgelse { $bias=0; $frame=128; } 57480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 58480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$car0="%o0"; 59480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$car1="%o1"; 60480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$car2="%o2"; # 1 bit 61480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$acc0="%o3"; 62480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$acc1="%o4"; 63480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$mask="%g1"; # 32 bits, what a waste... 64480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$tmp0="%g4"; 65480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$tmp1="%g5"; 66480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 67480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$i="%l0"; 68480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$j="%l1"; 69480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$mul0="%l2"; 70480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$mul1="%l3"; 71480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$tp="%l4"; 72480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$apj="%l5"; 73480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$npj="%l6"; 74480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$tpj="%l7"; 75480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 76480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$fname="bn_mul_mont_int"; 77480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 78480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$code=<<___; 79480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.section ".text",#alloc,#execinstr 80480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 81480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.global $fname 82480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.align 32 83480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$fname: 84480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp %o5,4 ! 128 bits minimum 85480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bge,pt %icc,.Lenter 86480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org sethi %hi(0xffffffff),$mask 87480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org retl 88480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org clr %o0 89480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.align 32 90480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lenter: 91480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org save %sp,-$frame,%sp 92480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org sll $num,2,$num ! num*=4 93480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $mask,%lo(0xffffffff),$mask 94480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$n0],$n0 95480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $ap,$bp 96480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $num,$mask,$num 97480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$bp],$mul0 ! bp[0] 98480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org nop 99480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 100480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %sp,$bias,%o7 ! real top of stack 101480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap],$car0 ! ap[0] ! redundant in squaring context 102480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org sub %o7,$num,%o7 103480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+4],$apj ! ap[1] 104480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and %o7,-1024,%o7 105480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np],$car1 ! np[0] 106480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org sub %o7,$bias,%sp ! alloca 107480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+4],$npj ! np[1] 108480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont 109480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 12,$j 110480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 111480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car0,$mul0,$car0 ! ap[0]*bp[0] 112480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0] 113480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 114480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %sp,$bias+$frame,$tp 115480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+8],$apj !prologue! 116480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 117480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $n0,$acc0,$mul1 ! "t[0]"*n0 118480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $mul1,$mask,$mul1 119480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 120480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 121480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0 122480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 123480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 124480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+8],$npj !prologue! 125480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 126480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp0,$acc0 !prologue! 127480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 128480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.L1st: 129480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 130480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$tmp1 131480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 132480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+$j],$apj ! ap[j] 133480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 134480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 135480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj ! np[j] 136480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 137480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 138480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j ! j++ 139480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp0,$acc0 140480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 141480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$num 142480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp1,$acc1 143480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 144480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.L1st 145480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp ! tp++ 146480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org!.L1st 147480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 148480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 !epilogue! 149480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$tmp1 150480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 151480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 152480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 153480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 154480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 155480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 156480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 157480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 158480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp0,$car0,$car0 159480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 160480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp1,$car1,$car1 161480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 162480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 163480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+4] 164480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 165480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 166480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car1,$car1 167480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+8] 168480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car2 169480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 170480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 4,$i ! i++ 171480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$bp+4],$mul0 ! bp[1] 172480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Louter: 173480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %sp,$bias+$frame,$tp 174480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap],$car0 ! ap[0] 175480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+4],$apj ! ap[1] 176480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np],$car1 ! np[0] 177480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+4],$npj ! np[1] 178480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp],$tmp1 ! tp[0] 179480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+4],$tpj ! tp[1] 180480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 12,$j 181480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 182480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car0,$mul0,$car0 183480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 !prologue! 184480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp1,$car0,$car0 185480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+8],$apj !prologue! 186480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 187480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 188480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $n0,$acc0,$mul1 189480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $mul1,$mask,$mul1 190480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 191480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car1,$mul1,$car1 192480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 !prologue! 193480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 194480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 195480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+8],$npj !prologue! 196480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 197480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp0,$acc0 !prologue! 198480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 199480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Linner: 200480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 201480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$tmp1 202480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car0,$car0 203480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+$j],$apj ! ap[j] 204480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 205480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 206480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj ! np[j] 207480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 208480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+8],$tpj ! tp[j] 209480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 210480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 211480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j ! j++ 212480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp0,$acc0 213480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] ! tp[j-1] 214480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 215480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp1,$acc1 216480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$num 217480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.Linner 218480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp ! tp++ 219480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org!.Linner 220480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 221480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 !epilogue! 222480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$tmp1 223480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car0,$car0 224480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 225480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+8],$tpj ! tp[j] 226480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 227480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 228480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 229480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 230480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] ! tp[j-1] 231480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 232480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 233480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car0,$car0 234480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp0,$car0,$car0 235480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 236480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp1,$car1,$car1 237480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 238480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+4] ! tp[j-1] 239480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 240480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $i,4,$i ! i++ 241480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 242480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 243480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car1,$car1 244480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $i,$num 245480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car2,$car1,$car1 246480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+8] 247480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 248480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car2 249480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl,a %icc,.Louter 250480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$bp+$i],$mul0 ! bp[i] 251480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org!.Louter 252480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 253480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,12,$tp 254480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 255480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Ltail: 256480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $np,$num,$np 257480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $rp,$num,$rp 258480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tp,$ap 259480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org sub %g0,$num,%o7 ! k=-num 260480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ba .Lsub 261480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org subcc %g0,%g0,%g0 ! clear %icc.c 262480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.align 16 263480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsub: 264480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+%o7],%o0 265480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+%o7],%o1 266480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org subccc %o0,%o1,%o1 ! tp[j]-np[j] 267480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $rp,%o7,$i 268480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %o7,4,%o7 269480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org brnz %o7,.Lsub 270480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st %o1,[$i] 271480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org subc $car2,0,$car2 ! handle upmost overflow bit 272480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $tp,$car2,$ap 273480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org andn $rp,$car2,$np 274480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $ap,$np,$ap 275480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org sub %g0,$num,%o7 276480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 277480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lcopy: 278480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+%o7],%o0 ! copy or in-place refresh 279480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st %g0,[$tp+%o7] ! zap tp 280480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st %o0,[$rp+%o7] 281480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %o7,4,%o7 282480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org brnz %o7,.Lcopy 283480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org nop 284480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 1,%i0 285480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ret 286480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org restore 287480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org___ 288480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 289480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org######## 290480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org######## .Lbn_sqr_mont gives up to 20% *overall* improvement over 291480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org######## code without following dedicated squaring procedure. 292480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org######## 293480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$sbit="%i2"; # re-use $bp! 294480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 295480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$code.=<<___; 296480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.align 32 297480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lbn_sqr_mont: 298480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $mul0,$mul0,$car0 ! ap[0]*ap[0] 299480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 !prologue! 300480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 301480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %sp,$bias+$frame,$tp 302480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+8],$apj !prologue! 303480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 304480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $n0,$acc0,$mul1 ! "t[0]"*n0 305480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 306480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $mul1,$mask,$mul1 307480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 308480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 309480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 !prologue! 310480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,1,$sbit 311480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+8],$npj !prologue! 312480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,1,$car0 313480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 314480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 315480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp0,$acc0 !prologue! 316480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 317480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsqr_1st: 318480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 319480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$tmp1 320480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 ! ap[j]*a0+c0 321480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 322480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+$j],$apj ! ap[j] 323480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 324480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj ! np[j] 325480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 326480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$acc0,$acc0 327480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$acc0,$acc0 328480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp1,$acc1 329480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $acc0,32,$sbit 330480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j ! j++ 331480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $acc0,$mask,$acc0 332480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$num 333480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 334480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 335480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov $tmp0,$acc0 336480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 337480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.Lsqr_1st 338480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp ! tp++ 339480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org!.Lsqr_1st 340480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 341480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$tmp0 ! epilogue 342480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$tmp1 343480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 ! ap[j]*a0+c0 344480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 345480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 346480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 347480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$acc0,$acc0 348480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$acc0,$acc0 349480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $acc0,32,$sbit 350480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $acc0,$mask,$acc0 351480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 352480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 353480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 354480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 355480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp0,$car0,$car0 ! ap[j]*a0+c0 356480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp1,$car1,$car1 357480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 358480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 359480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$acc0,$acc0 360480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$acc0,$acc0 361480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $acc0,32,$sbit 362480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $acc0,$mask,$acc0 363480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 364480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+4] 365480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 366480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 367480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car0,$car0 368480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$car0,$car0 369480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car1,$car1 370480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+8] 371480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car2 372480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 373480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [%sp+$bias+$frame],$tmp0 ! tp[0] 374480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [%sp+$bias+$frame+4],$tmp1 ! tp[1] 375480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [%sp+$bias+$frame+8],$tpj ! tp[2] 376480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+4],$mul0 ! ap[1] 377480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+8],$apj ! ap[2] 378480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np],$car1 ! np[0] 379480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+4],$npj ! np[1] 380480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $n0,$tmp0,$mul1 381480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 382480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $mul0,$mul0,$car0 383480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $mul1,$mask,$mul1 384480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 385480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car1,$mul1,$car1 386480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 387480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp0,$car1,$car1 388480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 389480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+8],$npj ! np[2] 390480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 391480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp1,$car1,$car1 392480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 393480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 394480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,1,$sbit 395480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 396480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,1,$car0 397480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 12,$j 398480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[%sp+$bias+$frame] ! tp[0]= 399480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 400480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %sp,$bias+$frame+4,$tp 401480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 402480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsqr_2nd: 403480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$acc0 404480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 405480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 406480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 407480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+$j],$apj ! ap[j] 408480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 409480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj ! np[j] 410480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 411480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 412480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+8],$tpj ! tp[j] 413480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$acc0,$acc0 414480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j ! j++ 415480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$acc0,$acc0 416480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $acc0,32,$sbit 417480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $acc0,$mask,$acc0 418480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$num 419480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 420480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] ! tp[j-1] 421480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 422480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.Lsqr_2nd 423480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp ! tp++ 424480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org!.Lsqr_2nd 425480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 426480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$acc0 427480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 428480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 429480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 430480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 431480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 432480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 433480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$acc0,$acc0 434480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$acc0,$acc0 435480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $acc0,32,$sbit 436480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $acc0,$mask,$acc0 437480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 438480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] ! tp[j-1] 439480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 440480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 441480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car0,$car0 442480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$car0,$car0 443480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car1,$car1 444480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car2,$car1,$car1 445480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+4] 446480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car2 447480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 448480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [%sp+$bias+$frame],$tmp1 ! tp[0] 449480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [%sp+$bias+$frame+4],$tpj ! tp[1] 450480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+8],$mul0 ! ap[2] 451480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np],$car1 ! np[0] 452480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+4],$npj ! np[1] 453480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $n0,$tmp1,$mul1 454480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $mul1,$mask,$mul1 455480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 8,$i 456480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 457480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $mul0,$mul0,$car0 458480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car1,$mul1,$car1 459480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 460480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp1,$car1,$car1 461480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 462480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %sp,$bias+$frame,$tp 463480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 464480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,1,$sbit 465480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,1,$car0 466480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 4,$j 467480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 468480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsqr_outer: 469480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsqr_inner1: 470480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 471480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 472480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j 473480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+8],$tpj 474480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$i 475480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 476480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj 477480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 478480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 479480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.Lsqr_inner1 480480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp 481480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org!.Lsqr_inner1 482480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 483480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j 484480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+$j],$apj ! ap[j] 485480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 486480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 487480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj ! np[j] 488480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 489480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+8],$tpj ! tp[j] 490480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 491480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 492480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 493480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 494480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j 495480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$num 496480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org be,pn %icc,.Lsqr_no_inner2 497480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp 498480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 499480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsqr_inner2: 500480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$acc0 501480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 502480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 503480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 504480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+$j],$apj ! ap[j] 505480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 506480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj ! np[j] 507480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 508480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$acc0,$acc0 509480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+8],$tpj ! tp[j] 510480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$acc0,$acc0 511480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j ! j++ 512480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $acc0,32,$sbit 513480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $acc0,$mask,$acc0 514480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$num 515480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 516480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 517480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] ! tp[j-1] 518480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 519480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.Lsqr_inner2 520480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp ! tp++ 521480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 522480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsqr_no_inner2: 523480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $apj,$mul0,$acc0 524480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 525480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 526480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car0,$car0 527480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 528480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 529480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$acc0,$acc0 530480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$acc0,$acc0 531480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $acc0,32,$sbit 532480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $acc0,$mask,$acc0 533480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 534480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 535480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] ! tp[j-1] 536480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 537480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 538480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car0,$car0 539480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$car0,$car0 540480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car1,$car1 541480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car2,$car1,$car1 542480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+4] 543480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car2 544480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 545480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $i,4,$i ! i++ 546480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [%sp+$bias+$frame],$tmp1 ! tp[0] 547480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [%sp+$bias+$frame+4],$tpj ! tp[1] 548480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$ap+$i],$mul0 ! ap[j] 549480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np],$car1 ! np[0] 550480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+4],$npj ! np[1] 551480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $n0,$tmp1,$mul1 552480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $mul1,$mask,$mul1 553480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $i,4,$tmp0 554480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 555480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $mul0,$mul0,$car0 556480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $car1,$mul1,$car1 557480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,$mask,$acc0 558480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tmp1,$car1,$car1 559480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,32,$car0 560480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add %sp,$bias+$frame,$tp 561480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 562480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org and $car0,1,$sbit 563480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car0,1,$car0 564480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 565480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $tmp0,$num ! i<num-1 566480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.Lsqr_outer 567480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mov 4,$j 568480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 569480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.Lsqr_last: 570480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 571480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 572480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $j,4,$j 573480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$tp+8],$tpj 574480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org cmp $j,$i 575480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 576480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ld [$np+$j],$npj 577480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 578480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 579480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org bl %icc,.Lsqr_last 580480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,4,$tp 581480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org!.Lsqr_last 582480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 583480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org mulx $npj,$mul1,$acc1 584480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tpj,$car1,$car1 585480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc0,$car1,$car1 586480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $acc1,$car1,$car1 587480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp] 588480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car1 589480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 590480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car0,$car0 ! recover $car0 591480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org or $sbit,$car0,$car0 592480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car0,$car1,$car1 593480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $car2,$car1,$car1 594480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org st $car1,[$tp+4] 595480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org srlx $car1,32,$car2 596480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org 597480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org ba .Ltail 598480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org add $tp,8,$tp 599480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.type $fname,#function 600480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.size $fname,(.-$fname) 601480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" 602480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org.align 32 603480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org___ 604480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.org$code =~ s/\`([^\`]*)\`/eval($1)/gem; 605480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.orgprint $code; 606480da75abf485e7e2a6be5acc0f71842368792c0jnd@chromium.orgclose STDOUT; 607