1.text 2 3.set noat 4.set noreorder 5 6.align 5 7.globl bn_mul_mont 8.ent bn_mul_mont 9bn_mul_mont: 10 lw $8,16($29) 11 lw $9,20($29) 12 slt $1,$9,4 13 bnez $1,1f 14 li $2,0 15 slt $1,$9,17 # on in-order CPU 16 bnezl $1,bn_mul_mont_internal 17 nop 181: jr $31 19 li $4,0 20.end bn_mul_mont 21 22.align 5 23.ent bn_mul_mont_internal 24bn_mul_mont_internal: 25 .frame $30,14*4,$31 26 .mask 0x40000000|16711680,-4 27 sub $29,14*4 28 sw $30,(14-1)*4($29) 29 sw $23,(14-2)*4($29) 30 sw $22,(14-3)*4($29) 31 sw $21,(14-4)*4($29) 32 sw $20,(14-5)*4($29) 33 sw $19,(14-6)*4($29) 34 sw $18,(14-7)*4($29) 35 sw $17,(14-8)*4($29) 36 sw $16,(14-9)*4($29) 37 move $30,$29 38 39 .set reorder 40 lw $8,0($8) 41 lw $13,0($6) # bp[0] 42 lw $12,0($5) # ap[0] 43 lw $14,0($7) # np[0] 44 45 sub $29,2*4 # place for two extra words 46 sll $9,2 47 li $1,-4096 48 sub $29,$9 49 and $29,$1 50 51 multu $12,$13 52 lw $16,4($5) 53 lw $18,4($7) 54 mflo $10 55 mfhi $11 56 multu $10,$8 57 mflo $23 58 59 multu $16,$13 60 mflo $16 61 mfhi $17 62 63 multu $14,$23 64 mflo $24 65 mfhi $25 66 multu $18,$23 67 addu $24,$10 68 sltu $1,$24,$10 69 addu $25,$1 70 mflo $18 71 mfhi $19 72 73 move $15,$29 74 li $22,2*4 75.align 4 76.L1st: 77 .set noreorder 78 add $12,$5,$22 79 add $14,$7,$22 80 lw $12,($12) 81 lw $14,($14) 82 83 multu $12,$13 84 addu $10,$16,$11 85 addu $24,$18,$25 86 sltu $1,$10,$11 87 sltu $2,$24,$25 88 addu $11,$17,$1 89 addu $25,$19,$2 90 mflo $16 91 mfhi $17 92 93 addu $24,$10 94 sltu $1,$24,$10 95 multu $14,$23 96 addu $25,$1 97 addu $22,4 98 sw $24,($15) 99 sltu $2,$22,$9 100 mflo $18 101 mfhi $19 102 103 bnez $2,.L1st 104 add $15,4 105 .set reorder 106 107 addu $10,$16,$11 108 sltu $1,$10,$11 109 addu $11,$17,$1 110 111 addu $24,$18,$25 112 sltu $2,$24,$25 113 addu $25,$19,$2 114 addu $24,$10 115 sltu $1,$24,$10 116 addu $25,$1 117 118 sw $24,($15) 119 120 addu $25,$11 121 sltu $1,$25,$11 122 sw $25,4($15) 123 sw $1,2*4($15) 124 125 li $21,4 126.align 4 127.Louter: 128 add $13,$6,$21 129 lw $13,($13) 130 lw $12,($5) 131 lw $16,4($5) 132 lw $20,($29) 133 134 multu $12,$13 135 lw $14,($7) 136 lw $18,4($7) 137 mflo $10 138 mfhi $11 139 addu $10,$20 140 multu $10,$8 141 sltu $1,$10,$20 142 addu $11,$1 143 mflo $23 144 145 multu $16,$13 146 mflo $16 147 mfhi $17 148 149 multu $14,$23 150 mflo $24 151 mfhi $25 152 153 multu $18,$23 154 addu $24,$10 155 sltu $1,$24,$10 156 addu $25,$1 157 mflo $18 158 mfhi $19 159 160 move $15,$29 161 li $22,2*4 162 lw $20,4($15) 163.align 4 164.Linner: 165 .set noreorder 166 add $12,$5,$22 167 add $14,$7,$22 168 lw $12,($12) 169 lw $14,($14) 170 171 multu $12,$13 172 addu $10,$16,$11 173 addu $24,$18,$25 174 sltu $1,$10,$11 175 sltu $2,$24,$25 176 addu $11,$17,$1 177 addu $25,$19,$2 178 mflo $16 179 mfhi $17 180 181 addu $10,$20 182 addu $22,4 183 multu $14,$23 184 sltu $1,$10,$20 185 addu $24,$10 186 addu $11,$1 187 sltu $2,$24,$10 188 lw $20,2*4($15) 189 addu $25,$2 190 sltu $1,$22,$9 191 mflo $18 192 mfhi $19 193 sw $24,($15) 194 bnez $1,.Linner 195 add $15,4 196 .set reorder 197 198 addu $10,$16,$11 199 sltu $1,$10,$11 200 addu $11,$17,$1 201 addu $10,$20 202 sltu $2,$10,$20 203 addu $11,$2 204 205 lw $20,2*4($15) 206 addu $24,$18,$25 207 sltu $1,$24,$25 208 addu $25,$19,$1 209 addu $24,$10 210 sltu $2,$24,$10 211 addu $25,$2 212 sw $24,($15) 213 214 addu $24,$25,$11 215 sltu $25,$24,$11 216 addu $24,$20 217 sltu $1,$24,$20 218 addu $25,$1 219 sw $24,4($15) 220 sw $25,2*4($15) 221 222 addu $21,4 223 sltu $2,$21,$9 224 bnez $2,.Louter 225 226 .set noreorder 227 add $20,$29,$9 # &tp[num] 228 move $15,$29 229 move $5,$29 230 li $11,0 # clear borrow bit 231 232.align 4 233.Lsub: lw $10,($15) 234 lw $24,($7) 235 add $15,4 236 add $7,4 237 subu $24,$10,$24 # tp[i]-np[i] 238 sgtu $1,$24,$10 239 subu $10,$24,$11 240 sgtu $11,$10,$24 241 sw $10,($4) 242 or $11,$1 243 sltu $1,$15,$20 244 bnez $1,.Lsub 245 add $4,4 246 247 subu $11,$25,$11 # handle upmost overflow bit 248 move $15,$29 249 sub $4,$9 # restore rp 250 not $25,$11 251 252 and $5,$11,$29 253 and $6,$25,$4 254 or $5,$5,$6 # ap=borrow?tp:rp 255 256.align 4 257.Lcopy: lw $12,($5) 258 add $5,4 259 sw $0,($15) 260 add $15,4 261 sltu $1,$15,$20 262 sw $12,($4) 263 bnez $1,.Lcopy 264 add $4,4 265 266 li $4,1 267 li $2,1 268 269 .set noreorder 270 move $29,$30 271 lw $30,(14-1)*4($29) 272 lw $23,(14-2)*4($29) 273 lw $22,(14-3)*4($29) 274 lw $21,(14-4)*4($29) 275 lw $20,(14-5)*4($29) 276 lw $19,(14-6)*4($29) 277 lw $18,(14-7)*4($29) 278 lw $17,(14-8)*4($29) 279 lw $16,(14-9)*4($29) 280 jr $31 281 add $29,14*4 282.end bn_mul_mont_internal 283.rdata 284.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" 285