1221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#!/usr/bin/env perl 2221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 3221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ==================================================================== 4221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and 6221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further 7221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/. 8221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ==================================================================== 9221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 10221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# December 2005 11221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# 12221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons 13221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# for undertaken effort are multiple. First of all, UltraSPARC is not 14221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# the whole SPARCv9 universe and other VIS-free implementations deserve 15221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# optimized code as much. Secondly, newly introduced UltraSPARC T1, 16221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes, 17221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with 18221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# several integrated RSA/DSA accelerator circuits accessible through 19221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# kernel driver [only(*)], but having decent user-land software 20221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# implementation is important too. Finally, reasons like desire to 21221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# experiment with dedicated squaring procedure. Yes, this module 22221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# implements one, because it was easiest to draft it in SPARCv9 23221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# instructions... 24221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 25221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# (*) Engine accessing the driver in question is on my TODO list. 26221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# For reference, acceleator is estimated to give 6 to 10 times 27221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# improvement on single-threaded RSA sign. It should be noted 28221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# that 6-10x improvement coefficient does not actually mean 29221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# something extraordinary in terms of absolute [single-threaded] 30221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# performance, as SPARCv9 instruction set is by all means least 31221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# suitable for high performance crypto among other 64 bit 32221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# platforms. 6-10x factor simply places T1 in same performance 33221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# domain as say AMD64 and IA-64. Improvement of RSA verify don't 34221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# appear impressive at all, but it's the sign operation which is 35221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# far more critical/interesting. 36221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 37221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# You might notice that inner loops are modulo-scheduled:-) This has 38221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# essentially negligible impact on UltraSPARC performance, it's 39221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Fujitsu SPARC64 V users who should notice and hopefully appreciate 40221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# the advantage... Currently this module surpasses sparcv9a-mont.pl 41221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a 42221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# module still have hidden potential [see TODO list there], which is 43221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# estimated to be larger than 20%... 44221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 45221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# int bn_mul_mont( 46221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$rp="%i0"; # BN_ULONG *rp, 47221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$ap="%i1"; # const BN_ULONG *ap, 48221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$bp="%i2"; # const BN_ULONG *bp, 49221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$np="%i3"; # const BN_ULONG *np, 50221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$n0="%i4"; # const BN_ULONG *n0, 51221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$num="%i5"; # int num); 52221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 53221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$bits=32; 54221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromfor (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } 55221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromif ($bits==64) { $bias=2047; $frame=192; } 56221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromelse { $bias=0; $frame=128; } 57221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 58221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$car0="%o0"; 59221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$car1="%o1"; 60221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$car2="%o2"; # 1 bit 61221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$acc0="%o3"; 62221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$acc1="%o4"; 63221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$mask="%g1"; # 32 bits, what a waste... 64221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tmp0="%g4"; 65221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tmp1="%g5"; 66221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 67221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$i="%l0"; 68221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$j="%l1"; 69221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$mul0="%l2"; 70221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$mul1="%l3"; 71221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tp="%l4"; 72221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$apj="%l5"; 73221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$npj="%l6"; 74221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tpj="%l7"; 75221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 76221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$fname="bn_mul_mont_int"; 77221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 78221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code=<<___; 79221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.section ".text",#alloc,#execinstr 80221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 81221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.global $fname 82221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align 32 83221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$fname: 84221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp %o5,4 ! 128 bits minimum 85221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bge,pt %icc,.Lenter 86221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom sethi %hi(0xffffffff),$mask 87221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom retl 88221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom clr %o0 89221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align 32 90221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lenter: 91221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom save %sp,-$frame,%sp 92221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom sll $num,2,$num ! num*=4 93221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $mask,%lo(0xffffffff),$mask 94221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$n0],$n0 95221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $ap,$bp 96221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $num,$mask,$num 97221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$bp],$mul0 ! bp[0] 98221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom nop 99221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 100221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %sp,$bias,%o7 ! real top of stack 101221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap],$car0 ! ap[0] ! redundant in squaring context 102221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom sub %o7,$num,%o7 103221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+4],$apj ! ap[1] 104221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and %o7,-1024,%o7 105221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np],$car1 ! np[0] 106221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom sub %o7,$bias,%sp ! alloca 107221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+4],$npj ! np[1] 108221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont 109221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 12,$j 110221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 111221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car0,$mul0,$car0 ! ap[0]*bp[0] 112221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0] 113221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 114221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %sp,$bias+$frame,$tp 115221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+8],$apj !prologue! 116221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 117221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $n0,$acc0,$mul1 ! "t[0]"*n0 118221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $mul1,$mask,$mul1 119221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 120221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 121221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0 122221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 123221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 124221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+8],$npj !prologue! 125221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 126221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp0,$acc0 !prologue! 127221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 128221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.L1st: 129221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 130221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$tmp1 131221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 132221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+$j],$apj ! ap[j] 133221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 134221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 135221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj ! np[j] 136221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 137221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 138221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j ! j++ 139221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp0,$acc0 140221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 141221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$num 142221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp1,$acc1 143221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 144221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.L1st 145221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp ! tp++ 146221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.L1st 147221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 148221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 !epilogue! 149221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$tmp1 150221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 151221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 152221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 153221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 154221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 155221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 156221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 157221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 158221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp0,$car0,$car0 159221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 160221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp1,$car1,$car1 161221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 162221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 163221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+4] 164221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 165221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 166221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car1,$car1 167221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+8] 168221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car2 169221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 170221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 4,$i ! i++ 171221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$bp+4],$mul0 ! bp[1] 172221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Louter: 173221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %sp,$bias+$frame,$tp 174221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap],$car0 ! ap[0] 175221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+4],$apj ! ap[1] 176221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np],$car1 ! np[0] 177221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+4],$npj ! np[1] 178221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp],$tmp1 ! tp[0] 179221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+4],$tpj ! tp[1] 180221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 12,$j 181221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 182221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car0,$mul0,$car0 183221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 !prologue! 184221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp1,$car0,$car0 185221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+8],$apj !prologue! 186221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 187221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 188221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $n0,$acc0,$mul1 189221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $mul1,$mask,$mul1 190221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 191221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car1,$mul1,$car1 192221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 !prologue! 193221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 194221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 195221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+8],$npj !prologue! 196221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 197221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp0,$acc0 !prologue! 198221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 199221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Linner: 200221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 201221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$tmp1 202221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car0,$car0 203221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+$j],$apj ! ap[j] 204221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 205221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 206221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj ! np[j] 207221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 208221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+8],$tpj ! tp[j] 209221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 210221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 211221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j ! j++ 212221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp0,$acc0 213221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] ! tp[j-1] 214221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 215221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp1,$acc1 216221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$num 217221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.Linner 218221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp ! tp++ 219221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Linner 220221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 221221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 !epilogue! 222221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$tmp1 223221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car0,$car0 224221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 225221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+8],$tpj ! tp[j] 226221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 227221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 228221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 229221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 230221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] ! tp[j-1] 231221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 232221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 233221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car0,$car0 234221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp0,$car0,$car0 235221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 236221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp1,$car1,$car1 237221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 238221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+4] ! tp[j-1] 239221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 240221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $i,4,$i ! i++ 241221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 242221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 243221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car1,$car1 244221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $i,$num 245221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car2,$car1,$car1 246221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+8] 247221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 248221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car2 249221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl,a %icc,.Louter 250221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$bp+$i],$mul0 ! bp[i] 251221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Louter 252221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 253221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,12,$tp 254221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 255221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Ltail: 256221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $np,$num,$np 257221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $rp,$num,$rp 258221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tp,$ap 259221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom sub %g0,$num,%o7 ! k=-num 260221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ba .Lsub 261221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom subcc %g0,%g0,%g0 ! clear %icc.c 262221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align 16 263221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsub: 264221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+%o7],%o0 265221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+%o7],%o1 266221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom subccc %o0,%o1,%o1 ! tp[j]-np[j] 267221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $rp,%o7,$i 268221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %o7,4,%o7 269221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom brnz %o7,.Lsub 270221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st %o1,[$i] 271221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom subc $car2,0,$car2 ! handle upmost overflow bit 272221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $tp,$car2,$ap 273221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom andn $rp,$car2,$np 274221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $ap,$np,$ap 275221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom sub %g0,$num,%o7 276221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 277221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lcopy: 278221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+%o7],%o0 ! copy or in-place refresh 279221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st %g0,[$tp+%o7] ! zap tp 280221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st %o0,[$rp+%o7] 281221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %o7,4,%o7 282221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom brnz %o7,.Lcopy 283221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom nop 284221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 1,%i0 285221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ret 286221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom restore 287221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___ 288221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 289221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom######## 290221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom######## .Lbn_sqr_mont gives up to 20% *overall* improvement over 291221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom######## code without following dedicated squaring procedure. 292221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom######## 293221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$sbit="%i2"; # re-use $bp! 294221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 295221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___; 296221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align 32 297221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lbn_sqr_mont: 298221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $mul0,$mul0,$car0 ! ap[0]*ap[0] 299221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 !prologue! 300221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 301221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %sp,$bias+$frame,$tp 302221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+8],$apj !prologue! 303221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 304221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $n0,$acc0,$mul1 ! "t[0]"*n0 305221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 306221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $mul1,$mask,$mul1 307221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 308221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 309221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 !prologue! 310221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,1,$sbit 311221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+8],$npj !prologue! 312221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,1,$car0 313221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 314221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 315221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp0,$acc0 !prologue! 316221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 317221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_1st: 318221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 319221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$tmp1 320221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 ! ap[j]*a0+c0 321221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 322221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+$j],$apj ! ap[j] 323221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 324221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj ! np[j] 325221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 326221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$acc0,$acc0 327221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$acc0,$acc0 328221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp1,$acc1 329221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $acc0,32,$sbit 330221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j ! j++ 331221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $acc0,$mask,$acc0 332221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$num 333221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 334221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 335221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov $tmp0,$acc0 336221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 337221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.Lsqr_1st 338221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp ! tp++ 339221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_1st 340221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 341221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$tmp0 ! epilogue 342221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$tmp1 343221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 ! ap[j]*a0+c0 344221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 345221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 346221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 347221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$acc0,$acc0 348221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$acc0,$acc0 349221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $acc0,32,$sbit 350221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $acc0,$mask,$acc0 351221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 352221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 353221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 354221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 355221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp0,$car0,$car0 ! ap[j]*a0+c0 356221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp1,$car1,$car1 357221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 358221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 359221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$acc0,$acc0 360221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$acc0,$acc0 361221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $acc0,32,$sbit 362221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $acc0,$mask,$acc0 363221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 364221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+4] 365221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 366221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 367221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car0,$car0 368221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$car0,$car0 369221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car1,$car1 370221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+8] 371221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car2 372221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 373221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [%sp+$bias+$frame],$tmp0 ! tp[0] 374221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [%sp+$bias+$frame+4],$tmp1 ! tp[1] 375221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [%sp+$bias+$frame+8],$tpj ! tp[2] 376221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+4],$mul0 ! ap[1] 377221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+8],$apj ! ap[2] 378221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np],$car1 ! np[0] 379221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+4],$npj ! np[1] 380221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $n0,$tmp0,$mul1 381221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 382221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $mul0,$mul0,$car0 383221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $mul1,$mask,$mul1 384221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 385221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car1,$mul1,$car1 386221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 387221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp0,$car1,$car1 388221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 389221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+8],$npj ! np[2] 390221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 391221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp1,$car1,$car1 392221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 393221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 394221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,1,$sbit 395221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 396221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,1,$car0 397221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 12,$j 398221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[%sp+$bias+$frame] ! tp[0]= 399221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 400221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %sp,$bias+$frame+4,$tp 401221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 402221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_2nd: 403221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$acc0 404221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 405221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 406221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 407221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+$j],$apj ! ap[j] 408221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 409221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj ! np[j] 410221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 411221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 412221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+8],$tpj ! tp[j] 413221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$acc0,$acc0 414221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j ! j++ 415221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$acc0,$acc0 416221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $acc0,32,$sbit 417221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $acc0,$mask,$acc0 418221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$num 419221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 420221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] ! tp[j-1] 421221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 422221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.Lsqr_2nd 423221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp ! tp++ 424221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_2nd 425221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 426221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$acc0 427221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 428221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 429221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 430221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 431221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 432221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 433221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$acc0,$acc0 434221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$acc0,$acc0 435221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $acc0,32,$sbit 436221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $acc0,$mask,$acc0 437221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 438221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] ! tp[j-1] 439221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 440221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 441221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car0,$car0 442221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$car0,$car0 443221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car1,$car1 444221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car2,$car1,$car1 445221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+4] 446221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car2 447221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 448221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [%sp+$bias+$frame],$tmp1 ! tp[0] 449221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [%sp+$bias+$frame+4],$tpj ! tp[1] 450221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+8],$mul0 ! ap[2] 451221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np],$car1 ! np[0] 452221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+4],$npj ! np[1] 453221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $n0,$tmp1,$mul1 454221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $mul1,$mask,$mul1 455221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 8,$i 456221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 457221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $mul0,$mul0,$car0 458221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car1,$mul1,$car1 459221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 460221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp1,$car1,$car1 461221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 462221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %sp,$bias+$frame,$tp 463221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 464221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,1,$sbit 465221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,1,$car0 466221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 4,$j 467221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 468221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_outer: 469221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_inner1: 470221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 471221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 472221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j 473221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+8],$tpj 474221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$i 475221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 476221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj 477221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 478221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 479221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.Lsqr_inner1 480221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp 481221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_inner1 482221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 483221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j 484221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+$j],$apj ! ap[j] 485221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 486221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 487221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj ! np[j] 488221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 489221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+8],$tpj ! tp[j] 490221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 491221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 492221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 493221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 494221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j 495221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$num 496221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom be,pn %icc,.Lsqr_no_inner2 497221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp 498221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 499221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_inner2: 500221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$acc0 501221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 502221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 503221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 504221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+$j],$apj ! ap[j] 505221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 506221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj ! np[j] 507221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 508221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$acc0,$acc0 509221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+8],$tpj ! tp[j] 510221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$acc0,$acc0 511221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j ! j++ 512221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $acc0,32,$sbit 513221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $acc0,$mask,$acc0 514221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$num 515221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 516221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 517221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] ! tp[j-1] 518221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 519221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.Lsqr_inner2 520221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp ! tp++ 521221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 522221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_no_inner2: 523221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $apj,$mul0,$acc0 524221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 525221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 526221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car0,$car0 527221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 528221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 529221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$acc0,$acc0 530221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$acc0,$acc0 531221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $acc0,32,$sbit 532221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $acc0,$mask,$acc0 533221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 534221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 535221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] ! tp[j-1] 536221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 537221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 538221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car0,$car0 539221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$car0,$car0 540221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car1,$car1 541221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car2,$car1,$car1 542221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+4] 543221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car2 544221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 545221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $i,4,$i ! i++ 546221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [%sp+$bias+$frame],$tmp1 ! tp[0] 547221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [%sp+$bias+$frame+4],$tpj ! tp[1] 548221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$ap+$i],$mul0 ! ap[j] 549221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np],$car1 ! np[0] 550221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+4],$npj ! np[1] 551221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $n0,$tmp1,$mul1 552221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $mul1,$mask,$mul1 553221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $i,4,$tmp0 554221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 555221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $mul0,$mul0,$car0 556221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $car1,$mul1,$car1 557221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,$mask,$acc0 558221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tmp1,$car1,$car1 559221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,32,$car0 560221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add %sp,$bias+$frame,$tp 561221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 562221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom and $car0,1,$sbit 563221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car0,1,$car0 564221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 565221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $tmp0,$num ! i<num-1 566221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.Lsqr_outer 567221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mov 4,$j 568221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 569221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_last: 570221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 571221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 572221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $j,4,$j 573221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$tp+8],$tpj 574221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom cmp $j,$i 575221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 576221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ld [$np+$j],$npj 577221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 578221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 579221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom bl %icc,.Lsqr_last 580221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,4,$tp 581221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_last 582221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 583221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom mulx $npj,$mul1,$acc1 584221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tpj,$car1,$car1 585221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc0,$car1,$car1 586221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $acc1,$car1,$car1 587221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp] 588221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car1 589221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 590221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car0,$car0 ! recover $car0 591221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom or $sbit,$car0,$car0 592221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car0,$car1,$car1 593221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $car2,$car1,$car1 594221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom st $car1,[$tp+4] 595221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom srlx $car1,32,$car2 596221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom 597221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom ba .Ltail 598221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom add $tp,8,$tp 599221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.type $fname,#function 600221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.size $fname,(.-$fname) 601221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" 602221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align 32 603221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___ 604221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval($1)/gem; 605221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromprint $code; 606221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromclose STDOUT; 607