1221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#!/usr/bin/env perl
2221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
3221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
4221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and
6221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further
7221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/.
8221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
9221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
10221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# December 2005
11221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
12221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
13221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# for undertaken effort are multiple. First of all, UltraSPARC is not
14221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# the whole SPARCv9 universe and other VIS-free implementations deserve
15221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# optimized code as much. Secondly, newly introduced UltraSPARC T1,
16221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
17221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
18221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# several integrated RSA/DSA accelerator circuits accessible through
19221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# kernel driver [only(*)], but having decent user-land software
20221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# implementation is important too. Finally, reasons like desire to
21221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# experiment with dedicated squaring procedure. Yes, this module
22221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# implements one, because it was easiest to draft it in SPARCv9
23221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# instructions...
24221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
25221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# (*)	Engine accessing the driver in question is on my TODO list.
26221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	For reference, acceleator is estimated to give 6 to 10 times
27221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	improvement on single-threaded RSA sign. It should be noted
28221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	that 6-10x improvement coefficient does not actually mean
29221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	something extraordinary in terms of absolute [single-threaded]
30221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	performance, as SPARCv9 instruction set is by all means least
31221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	suitable for high performance crypto among other 64 bit
32221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	platforms. 6-10x factor simply places T1 in same performance
33221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	domain as say AMD64 and IA-64. Improvement of RSA verify don't
34221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	appear impressive at all, but it's the sign operation which is
35221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	far more critical/interesting.
36221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
37221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# You might notice that inner loops are modulo-scheduled:-) This has
38221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# essentially negligible impact on UltraSPARC performance, it's
39221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Fujitsu SPARC64 V users who should notice and hopefully appreciate
40221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# the advantage... Currently this module surpasses sparcv9a-mont.pl
41221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
42221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# module still have hidden potential [see TODO list there], which is
43221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# estimated to be larger than 20%...
44221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
45221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# int bn_mul_mont(
46221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$rp="%i0";	# BN_ULONG *rp,
47221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$ap="%i1";	# const BN_ULONG *ap,
48221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$bp="%i2";	# const BN_ULONG *bp,
49221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$np="%i3";	# const BN_ULONG *np,
50221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$n0="%i4";	# const BN_ULONG *n0,
51221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$num="%i5";	# int num);
52221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
53221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$bits=32;
54221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromfor (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
55221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromif ($bits==64)	{ $bias=2047; $frame=192; }
56221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromelse		{ $bias=0;    $frame=128; }
57221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
58221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$car0="%o0";
59221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$car1="%o1";
60221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$car2="%o2";	# 1 bit
61221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$acc0="%o3";
62221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$acc1="%o4";
63221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$mask="%g1";	# 32 bits, what a waste...
64221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tmp0="%g4";
65221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tmp1="%g5";
66221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
67221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$i="%l0";
68221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$j="%l1";
69221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$mul0="%l2";
70221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$mul1="%l3";
71221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tp="%l4";
72221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$apj="%l5";
73221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$npj="%l6";
74221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$tpj="%l7";
75221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
76221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$fname="bn_mul_mont_int";
77221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
78221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code=<<___;
79221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.section	".text",#alloc,#execinstr
80221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
81221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.global	$fname
82221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align	32
83221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$fname:
84221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	%o5,4			! 128 bits minimum
85221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bge,pt	%icc,.Lenter
86221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	sethi	%hi(0xffffffff),$mask
87221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	retl
88221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	clr	%o0
89221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align	32
90221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lenter:
91221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	save	%sp,-$frame,%sp
92221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	sll	$num,2,$num		! num*=4
93221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$mask,%lo(0xffffffff),$mask
94221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$n0],$n0
95221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$ap,$bp
96221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$num,$mask,$num
97221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$bp],$mul0		! bp[0]
98221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop
99221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
100221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%sp,$bias,%o7		! real top of stack
101221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap],$car0		! ap[0] ! redundant in squaring context
102221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	sub	%o7,$num,%o7
103221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+4],$apj		! ap[1]
104221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	%o7,-1024,%o7
105221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np],$car1		! np[0]
106221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	sub	%o7,$bias,%sp		! alloca
107221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+4],$npj		! np[1]
108221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	be,pt	`$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
109221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	12,$j
110221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
111221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car0,$mul0,$car0	! ap[0]*bp[0]
112221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0	!prologue! ap[1]*bp[0]
113221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
114221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%sp,$bias+$frame,$tp
115221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+8],$apj		!prologue!
116221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
117221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$n0,$acc0,$mul1		! "t[0]"*n0
118221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$mul1,$mask,$mul1
119221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
120221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car1,$mul1,$car1	! np[0]*"t[0]"*n0
121221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1	!prologue! np[1]*"t[0]"*n0
122221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
123221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
124221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+8],$npj		!prologue!
125221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
126221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp0,$acc0		!prologue!
127221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
128221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.L1st:
129221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0
130221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$tmp1
131221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
132221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+$j],$apj		! ap[j]
133221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
134221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
135221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj		! np[j]
136221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
137221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
138221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j			! j++
139221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp0,$acc0
140221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
141221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$num
142221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp1,$acc1
143221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
144221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.L1st
145221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp		! tp++
146221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.L1st
147221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
148221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0	!epilogue!
149221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$tmp1
150221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
151221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
152221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
153221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
154221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
155221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
156221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
157221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
158221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp0,$car0,$car0
159221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
160221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp1,$car1,$car1
161221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
162221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
163221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+4]
164221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
165221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
166221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car1,$car1
167221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+8]
168221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car2
169221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
170221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	4,$i			! i++
171221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$bp+4],$mul0		! bp[1]
172221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Louter:
173221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%sp,$bias+$frame,$tp
174221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap],$car0		! ap[0]
175221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+4],$apj		! ap[1]
176221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np],$car1		! np[0]
177221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+4],$npj		! np[1]
178221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp],$tmp1		! tp[0]
179221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+4],$tpj		! tp[1]
180221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	12,$j
181221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
182221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car0,$mul0,$car0
183221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0	!prologue!
184221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp1,$car0,$car0
185221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+8],$apj		!prologue!
186221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
187221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
188221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$n0,$acc0,$mul1
189221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$mul1,$mask,$mul1
190221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
191221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car1,$mul1,$car1
192221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1	!prologue!
193221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
194221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
195221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+8],$npj		!prologue!
196221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
197221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp0,$acc0		!prologue!
198221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
199221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Linner:
200221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0
201221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$tmp1
202221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car0,$car0
203221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+$j],$apj		! ap[j]
204221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
205221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
206221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj		! np[j]
207221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
208221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+8],$tpj		! tp[j]
209221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
210221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
211221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j			! j++
212221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp0,$acc0
213221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]		! tp[j-1]
214221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
215221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp1,$acc1
216221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$num
217221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.Linner
218221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp		! tp++
219221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Linner
220221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
221221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0	!epilogue!
222221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$tmp1
223221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car0,$car0
224221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
225221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+8],$tpj		! tp[j]
226221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
227221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
228221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
229221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
230221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]		! tp[j-1]
231221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
232221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
233221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car0,$car0
234221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp0,$car0,$car0
235221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
236221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp1,$car1,$car1
237221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
238221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+4]		! tp[j-1]
239221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
240221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$i,4,$i			! i++
241221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
242221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
243221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car1,$car1
244221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$i,$num
245221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car2,$car1,$car1
246221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+8]
247221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
248221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car2
249221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl,a	%icc,.Louter
250221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$bp+$i],$mul0		! bp[i]
251221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Louter
252221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
253221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,12,$tp
254221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
255221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Ltail:
256221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$np,$num,$np
257221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$rp,$num,$rp
258221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tp,$ap
259221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	sub	%g0,$num,%o7		! k=-num
260221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ba	.Lsub
261221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	subcc	%g0,%g0,%g0		! clear %icc.c
262221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align	16
263221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsub:
264221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+%o7],%o0
265221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+%o7],%o1
266221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	subccc	%o0,%o1,%o1		! tp[j]-np[j]
267221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$rp,%o7,$i
268221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%o7,4,%o7
269221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	brnz	%o7,.Lsub
270221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	%o1,[$i]
271221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	subc	$car2,0,$car2		! handle upmost overflow bit
272221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$tp,$car2,$ap
273221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andn	$rp,$car2,$np
274221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$ap,$np,$ap
275221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	sub	%g0,$num,%o7
276221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
277221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lcopy:
278221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+%o7],%o0		! copy or in-place refresh
279221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	%g0,[$tp+%o7]		! zap tp
280221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	%o0,[$rp+%o7]
281221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%o7,4,%o7
282221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	brnz	%o7,.Lcopy
283221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop
284221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	1,%i0
285221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ret
286221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	restore
287221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___
288221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
289221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom########
290221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
291221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom######## code without following dedicated squaring procedure.
292221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom########
293221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$sbit="%i2";		# re-use $bp!
294221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
295221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___;
296221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align	32
297221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lbn_sqr_mont:
298221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$mul0,$mul0,$car0		! ap[0]*ap[0]
299221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0		!prologue!
300221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
301221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%sp,$bias+$frame,$tp
302221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+8],$apj			!prologue!
303221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
304221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$n0,$acc0,$mul1			! "t[0]"*n0
305221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
306221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$mul1,$mask,$mul1
307221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
308221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car1,$mul1,$car1		! np[0]*"t[0]"*n0
309221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1		!prologue!
310221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,1,$sbit
311221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+8],$npj			!prologue!
312221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,1,$car0
313221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
314221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
315221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp0,$acc0			!prologue!
316221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
317221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_1st:
318221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0
319221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$tmp1
320221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0		! ap[j]*a0+c0
321221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
322221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+$j],$apj			! ap[j]
323221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
324221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj			! np[j]
325221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
326221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$acc0,$acc0
327221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$acc0,$acc0
328221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp1,$acc1
329221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$acc0,32,$sbit
330221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j				! j++
331221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$acc0,$mask,$acc0
332221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$num
333221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
334221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
335221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	$tmp0,$acc0
336221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
337221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.Lsqr_1st
338221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp			! tp++
339221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_1st
340221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
341221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$tmp0		! epilogue
342221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$tmp1
343221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0		! ap[j]*a0+c0
344221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
345221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
346221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
347221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$acc0,$acc0
348221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$acc0,$acc0
349221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$acc0,32,$sbit
350221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$acc0,$mask,$acc0
351221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
352221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
353221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
354221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
355221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp0,$car0,$car0		! ap[j]*a0+c0
356221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp1,$car1,$car1
357221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
358221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
359221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$acc0,$acc0
360221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$acc0,$acc0
361221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$acc0,32,$sbit
362221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$acc0,$mask,$acc0
363221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
364221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+4]
365221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
366221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
367221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car0,$car0
368221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$car0,$car0
369221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car1,$car1
370221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+8]
371221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car2
372221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
373221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[%sp+$bias+$frame],$tmp0	! tp[0]
374221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[%sp+$bias+$frame+4],$tmp1	! tp[1]
375221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[%sp+$bias+$frame+8],$tpj	! tp[2]
376221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+4],$mul0			! ap[1]
377221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+8],$apj			! ap[2]
378221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np],$car1			! np[0]
379221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+4],$npj			! np[1]
380221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$n0,$tmp0,$mul1
381221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
382221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$mul0,$mul0,$car0
383221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$mul1,$mask,$mul1
384221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
385221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car1,$mul1,$car1
386221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
387221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp0,$car1,$car1
388221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
389221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+8],$npj			! np[2]
390221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
391221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp1,$car1,$car1
392221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
393221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
394221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,1,$sbit
395221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
396221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,1,$car0
397221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	12,$j
398221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[%sp+$bias+$frame]	! tp[0]=
399221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
400221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%sp,$bias+$frame+4,$tp
401221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
402221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_2nd:
403221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$acc0
404221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
405221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
406221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
407221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+$j],$apj			! ap[j]
408221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
409221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj			! np[j]
410221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
411221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
412221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+8],$tpj			! tp[j]
413221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$acc0,$acc0
414221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j				! j++
415221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$acc0,$acc0
416221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$acc0,32,$sbit
417221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$acc0,$mask,$acc0
418221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$num
419221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
420221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]			! tp[j-1]
421221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
422221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.Lsqr_2nd
423221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp			! tp++
424221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_2nd
425221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
426221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$acc0
427221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
428221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
429221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
430221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
431221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
432221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
433221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$acc0,$acc0
434221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$acc0,$acc0
435221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$acc0,32,$sbit
436221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$acc0,$mask,$acc0
437221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
438221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]			! tp[j-1]
439221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
440221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
441221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car0,$car0
442221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$car0,$car0
443221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car1,$car1
444221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car2,$car1,$car1
445221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+4]
446221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car2
447221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
448221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[%sp+$bias+$frame],$tmp1	! tp[0]
449221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[%sp+$bias+$frame+4],$tpj	! tp[1]
450221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+8],$mul0			! ap[2]
451221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np],$car1			! np[0]
452221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+4],$npj			! np[1]
453221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$n0,$tmp1,$mul1
454221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$mul1,$mask,$mul1
455221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	8,$i
456221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
457221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$mul0,$mul0,$car0
458221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car1,$mul1,$car1
459221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
460221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp1,$car1,$car1
461221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
462221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%sp,$bias+$frame,$tp
463221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
464221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,1,$sbit
465221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,1,$car0
466221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	4,$j
467221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
468221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_outer:
469221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_inner1:
470221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
471221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
472221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j
473221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+8],$tpj
474221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$i
475221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
476221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj
477221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
478221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
479221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.Lsqr_inner1
480221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp
481221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_inner1
482221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
483221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j
484221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+$j],$apj			! ap[j]
485221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
486221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
487221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj			! np[j]
488221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
489221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+8],$tpj			! tp[j]
490221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
491221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
492221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
493221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
494221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j
495221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$num
496221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	be,pn	%icc,.Lsqr_no_inner2
497221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp
498221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
499221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_inner2:
500221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$acc0
501221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
502221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
503221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
504221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+$j],$apj			! ap[j]
505221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
506221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj			! np[j]
507221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
508221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$acc0,$acc0
509221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+8],$tpj			! tp[j]
510221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$acc0,$acc0
511221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j				! j++
512221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$acc0,32,$sbit
513221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$acc0,$mask,$acc0
514221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$num
515221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
516221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
517221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]			! tp[j-1]
518221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
519221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.Lsqr_inner2
520221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp			! tp++
521221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
522221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_no_inner2:
523221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$apj,$mul0,$acc0
524221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
525221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
526221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car0,$car0
527221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
528221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
529221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$acc0,$acc0
530221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$acc0,$acc0
531221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$acc0,32,$sbit
532221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$acc0,$mask,$acc0
533221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
534221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
535221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]			! tp[j-1]
536221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
537221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
538221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car0,$car0
539221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$car0,$car0
540221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car1,$car1
541221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car2,$car1,$car1
542221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+4]
543221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car2
544221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
545221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$i,4,$i				! i++
546221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[%sp+$bias+$frame],$tmp1	! tp[0]
547221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[%sp+$bias+$frame+4],$tpj	! tp[1]
548221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$ap+$i],$mul0			! ap[j]
549221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np],$car1			! np[0]
550221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+4],$npj			! np[1]
551221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$n0,$tmp1,$mul1
552221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$mul1,$mask,$mul1
553221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$i,4,$tmp0
554221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
555221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$mul0,$mul0,$car0
556221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$car1,$mul1,$car1
557221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,$mask,$acc0
558221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tmp1,$car1,$car1
559221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,32,$car0
560221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	%sp,$bias+$frame,$tp
561221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
562221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$car0,1,$sbit
563221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car0,1,$car0
564221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
565221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$tmp0,$num			! i<num-1
566221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.Lsqr_outer
567221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	4,$j
568221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
569221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.Lsqr_last:
570221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
571221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
572221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$j,4,$j
573221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$tp+8],$tpj
574221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp	$j,$i
575221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
576221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld	[$np+$j],$npj
577221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
578221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
579221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	bl	%icc,.Lsqr_last
580221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,4,$tp
581221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom!.Lsqr_last
582221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
583221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mulx	$npj,$mul1,$acc1
584221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tpj,$car1,$car1
585221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc0,$car1,$car1
586221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$acc1,$car1,$car1
587221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp]
588221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car1
589221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
590221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car0,$car0		! recover $car0
591221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$sbit,$car0,$car0
592221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car0,$car1,$car1
593221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$car2,$car1,$car1
594221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st	$car1,[$tp+4]
595221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	srlx	$car1,32,$car2
596221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
597221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ba	.Ltail
598221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$tp,8,$tp
599221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.type	$fname,#function
600221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.size	$fname,(.-$fname)
601221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.asciz	"Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
602221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.align	32
603221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___
604221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval($1)/gem;
605221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromprint $code;
606221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromclose STDOUT;
607