1.text
2
3.set	noat
4.set	noreorder
5
6.align	5
7.globl	bn_mul_mont
8.ent	bn_mul_mont
9bn_mul_mont:
10	lw	$8,16($29)
11	lw	$9,20($29)
12	slt	$1,$9,4
13	bnez	$1,1f
14	li	$2,0
15	slt	$1,$9,17	# on in-order CPU
16	bnezl	$1,bn_mul_mont_internal
17	nop
181:	jr	$31
19	li	$4,0
20.end	bn_mul_mont
21
22.align	5
23.ent	bn_mul_mont_internal
24bn_mul_mont_internal:
25	.frame	$30,14*4,$31
26	.mask	0x40000000|16711680,-4
27	sub $29,14*4
28	sw	$30,(14-1)*4($29)
29	sw	$23,(14-2)*4($29)
30	sw	$22,(14-3)*4($29)
31	sw	$21,(14-4)*4($29)
32	sw	$20,(14-5)*4($29)
33	sw	$19,(14-6)*4($29)
34	sw	$18,(14-7)*4($29)
35	sw	$17,(14-8)*4($29)
36	sw	$16,(14-9)*4($29)
37	move	$30,$29
38
39	.set	reorder
40	lw	$8,0($8)
41	lw	$13,0($6)	# bp[0]
42	lw	$12,0($5)	# ap[0]
43	lw	$14,0($7)	# np[0]
44
45	sub $29,2*4	# place for two extra words
46	sll	$9,2
47	li	$1,-4096
48	sub $29,$9
49	and	$29,$1
50
51	multu	$12,$13
52	lw	$16,4($5)
53	lw	$18,4($7)
54	mflo	$10
55	mfhi	$11
56	multu	$10,$8
57	mflo	$23
58
59	multu	$16,$13
60	mflo	$16
61	mfhi	$17
62
63	multu	$14,$23
64	mflo	$24
65	mfhi	$25
66	multu	$18,$23
67	addu	$24,$10
68	sltu	$1,$24,$10
69	addu	$25,$1
70	mflo	$18
71	mfhi	$19
72
73	move	$15,$29
74	li	$22,2*4
75.align	4
76.L1st:
77	.set	noreorder
78	add $12,$5,$22
79	add $14,$7,$22
80	lw	$12,($12)
81	lw	$14,($14)
82
83	multu	$12,$13
84	addu	$10,$16,$11
85	addu	$24,$18,$25
86	sltu	$1,$10,$11
87	sltu	$2,$24,$25
88	addu	$11,$17,$1
89	addu	$25,$19,$2
90	mflo	$16
91	mfhi	$17
92
93	addu	$24,$10
94	sltu	$1,$24,$10
95	multu	$14,$23
96	addu	$25,$1
97	addu	$22,4
98	sw	$24,($15)
99	sltu	$2,$22,$9
100	mflo	$18
101	mfhi	$19
102
103	bnez	$2,.L1st
104	add $15,4
105	.set	reorder
106
107	addu	$10,$16,$11
108	sltu	$1,$10,$11
109	addu	$11,$17,$1
110
111	addu	$24,$18,$25
112	sltu	$2,$24,$25
113	addu	$25,$19,$2
114	addu	$24,$10
115	sltu	$1,$24,$10
116	addu	$25,$1
117
118	sw	$24,($15)
119
120	addu	$25,$11
121	sltu	$1,$25,$11
122	sw	$25,4($15)
123	sw	$1,2*4($15)
124
125	li	$21,4
126.align	4
127.Louter:
128	add $13,$6,$21
129	lw	$13,($13)
130	lw	$12,($5)
131	lw	$16,4($5)
132	lw	$20,($29)
133
134	multu	$12,$13
135	lw	$14,($7)
136	lw	$18,4($7)
137	mflo	$10
138	mfhi	$11
139	addu	$10,$20
140	multu	$10,$8
141	sltu	$1,$10,$20
142	addu	$11,$1
143	mflo	$23
144
145	multu	$16,$13
146	mflo	$16
147	mfhi	$17
148
149	multu	$14,$23
150	mflo	$24
151	mfhi	$25
152
153	multu	$18,$23
154	addu	$24,$10
155	sltu	$1,$24,$10
156	addu	$25,$1
157	mflo	$18
158	mfhi	$19
159
160	move	$15,$29
161	li	$22,2*4
162	lw	$20,4($15)
163.align	4
164.Linner:
165	.set	noreorder
166	add $12,$5,$22
167	add $14,$7,$22
168	lw	$12,($12)
169	lw	$14,($14)
170
171	multu	$12,$13
172	addu	$10,$16,$11
173	addu	$24,$18,$25
174	sltu	$1,$10,$11
175	sltu	$2,$24,$25
176	addu	$11,$17,$1
177	addu	$25,$19,$2
178	mflo	$16
179	mfhi	$17
180
181	addu	$10,$20
182	addu	$22,4
183	multu	$14,$23
184	sltu	$1,$10,$20
185	addu	$24,$10
186	addu	$11,$1
187	sltu	$2,$24,$10
188	lw	$20,2*4($15)
189	addu	$25,$2
190	sltu	$1,$22,$9
191	mflo	$18
192	mfhi	$19
193	sw	$24,($15)
194	bnez	$1,.Linner
195	add $15,4
196	.set	reorder
197
198	addu	$10,$16,$11
199	sltu	$1,$10,$11
200	addu	$11,$17,$1
201	addu	$10,$20
202	sltu	$2,$10,$20
203	addu	$11,$2
204
205	lw	$20,2*4($15)
206	addu	$24,$18,$25
207	sltu	$1,$24,$25
208	addu	$25,$19,$1
209	addu	$24,$10
210	sltu	$2,$24,$10
211	addu	$25,$2
212	sw	$24,($15)
213
214	addu	$24,$25,$11
215	sltu	$25,$24,$11
216	addu	$24,$20
217	sltu	$1,$24,$20
218	addu	$25,$1
219	sw	$24,4($15)
220	sw	$25,2*4($15)
221
222	addu	$21,4
223	sltu	$2,$21,$9
224	bnez	$2,.Louter
225
226	.set	noreorder
227	add $20,$29,$9	# &tp[num]
228	move	$15,$29
229	move	$5,$29
230	li	$11,0		# clear borrow bit
231
232.align	4
233.Lsub:	lw	$10,($15)
234	lw	$24,($7)
235	add $15,4
236	add $7,4
237	subu	$24,$10,$24	# tp[i]-np[i]
238	sgtu	$1,$24,$10
239	subu	$10,$24,$11
240	sgtu	$11,$10,$24
241	sw	$10,($4)
242	or	$11,$1
243	sltu	$1,$15,$20
244	bnez	$1,.Lsub
245	add $4,4
246
247	subu	$11,$25,$11	# handle upmost overflow bit
248	move	$15,$29
249	sub $4,$9	# restore rp
250	not	$25,$11
251
252	and	$5,$11,$29
253	and	$6,$25,$4
254	or	$5,$5,$6	# ap=borrow?tp:rp
255
256.align	4
257.Lcopy:	lw	$12,($5)
258	add $5,4
259	sw	$0,($15)
260	add $15,4
261	sltu	$1,$15,$20
262	sw	$12,($4)
263	bnez	$1,.Lcopy
264	add $4,4
265
266	li	$4,1
267	li	$2,1
268
269	.set	noreorder
270	move	$29,$30
271	lw	$30,(14-1)*4($29)
272	lw	$23,(14-2)*4($29)
273	lw	$22,(14-3)*4($29)
274	lw	$21,(14-4)*4($29)
275	lw	$20,(14-5)*4($29)
276	lw	$19,(14-6)*4($29)
277	lw	$18,(14-7)*4($29)
278	lw	$17,(14-8)*4($29)
279	lw	$16,(14-9)*4($29)
280	jr	$31
281	add $29,14*4
282.end	bn_mul_mont_internal
283.rdata
284.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
285