1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. Rights for redistribution and usage in source and binary 6# forms are granted according to the OpenSSL license. 7# ==================================================================== 8# 9# Version 1.1 10# 11# The major reason for undertaken effort was to mitigate the hazard of 12# cache-timing attack. This is [currently and initially!] addressed in 13# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each. 14# 2. References to them are scheduled for L2 cache latency, meaning 15# that the tables don't have to reside in L1 cache. Once again, this 16# is an initial draft and one should expect more countermeasures to 17# be implemented... 18# 19# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last 20# round. 21# 22# Even though performance was not the primary goal [on the contrary, 23# extra shifts "induced" by compressed S-box and longer loop epilogue 24# "induced" by scheduling for L2 have negative effect on performance], 25# the code turned out to run in ~23 cycles per processed byte en-/ 26# decrypted with 128-bit key. This is pretty good result for code 27# with mentioned qualities and UltraSPARC core. Compared to Sun C 28# generated code my encrypt procedure runs just few percents faster, 29# while decrypt one - whole 50% faster [yes, Sun C failed to generate 30# optimal decrypt procedure]. Compared to GNU C generated code both 31# procedures are more than 60% faster:-) 32 33$bits=32; 34for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } 35if ($bits==64) { $bias=2047; $frame=192; } 36else { $bias=0; $frame=112; } 37$locals=16; 38 39$acc0="%l0"; 40$acc1="%o0"; 41$acc2="%o1"; 42$acc3="%o2"; 43 44$acc4="%l1"; 45$acc5="%o3"; 46$acc6="%o4"; 47$acc7="%o5"; 48 49$acc8="%l2"; 50$acc9="%o7"; 51$acc10="%g1"; 52$acc11="%g2"; 53 54$acc12="%l3"; 55$acc13="%g3"; 56$acc14="%g4"; 57$acc15="%g5"; 58 59$t0="%l4"; 60$t1="%l5"; 61$t2="%l6"; 62$t3="%l7"; 63 64$s0="%i0"; 65$s1="%i1"; 66$s2="%i2"; 67$s3="%i3"; 68$tbl="%i4"; 69$key="%i5"; 70$rounds="%i7"; # aliases with return address, which is off-loaded to stack 71 72sub _data_word() 73{ my $i; 74 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 75} 76 77$code.=<<___ if ($bits==64); 78.register %g2,#scratch 79.register %g3,#scratch 80___ 81$code.=<<___; 82.section ".text",#alloc,#execinstr 83 84.align 256 85AES_Te: 86___ 87&_data_word( 88 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 89 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 90 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 91 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 92 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 93 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 94 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 95 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 96 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 97 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 98 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 99 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 100 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 101 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 102 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 103 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 104 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 105 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 106 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 107 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 108 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 109 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 110 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 111 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 112 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 113 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 114 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 115 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 116 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 117 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 118 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 119 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 120 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 121 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 122 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 123 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 124 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 125 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 126 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 127 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 128 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 129 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 130 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 131 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 132 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 133 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 134 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 135 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 136 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 137 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 138 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 139 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 140 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 141 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 142 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 143 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 144 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 145 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 146 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 147 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 148 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 149 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 150 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 151 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 152$code.=<<___; 153 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 154 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 155 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 156 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 157 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 158 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 159 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 160 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 161 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 162 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 163 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 164 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 165 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 166 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 167 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 168 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 169 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 170 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 171 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 172 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 173 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 174 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 175 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 176 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 177 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 178 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 179 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 180 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 181 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 182 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 183 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 184 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 185.type AES_Te,#object 186.size AES_Te,(.-AES_Te) 187 188.align 64 189.skip 16 190_sparcv9_AES_encrypt: 191 save %sp,-$frame-$locals,%sp 192 stx %i7,[%sp+$bias+$frame+0] ! off-load return address 193 ld [$key+240],$rounds 194 ld [$key+0],$t0 195 ld [$key+4],$t1 ! 196 ld [$key+8],$t2 197 srl $rounds,1,$rounds 198 xor $t0,$s0,$s0 199 ld [$key+12],$t3 200 srl $s0,21,$acc0 201 xor $t1,$s1,$s1 202 ld [$key+16],$t0 203 srl $s1,13,$acc1 ! 204 xor $t2,$s2,$s2 205 ld [$key+20],$t1 206 xor $t3,$s3,$s3 207 ld [$key+24],$t2 208 and $acc0,2040,$acc0 209 ld [$key+28],$t3 210 nop 211.Lenc_loop: 212 srl $s2,5,$acc2 ! 213 and $acc1,2040,$acc1 214 ldx [$tbl+$acc0],$acc0 215 sll $s3,3,$acc3 216 and $acc2,2040,$acc2 217 ldx [$tbl+$acc1],$acc1 218 srl $s1,21,$acc4 219 and $acc3,2040,$acc3 220 ldx [$tbl+$acc2],$acc2 ! 221 srl $s2,13,$acc5 222 and $acc4,2040,$acc4 223 ldx [$tbl+$acc3],$acc3 224 srl $s3,5,$acc6 225 and $acc5,2040,$acc5 226 ldx [$tbl+$acc4],$acc4 227 fmovs %f0,%f0 228 sll $s0,3,$acc7 ! 229 and $acc6,2040,$acc6 230 ldx [$tbl+$acc5],$acc5 231 srl $s2,21,$acc8 232 and $acc7,2040,$acc7 233 ldx [$tbl+$acc6],$acc6 234 srl $s3,13,$acc9 235 and $acc8,2040,$acc8 236 ldx [$tbl+$acc7],$acc7 ! 237 srl $s0,5,$acc10 238 and $acc9,2040,$acc9 239 ldx [$tbl+$acc8],$acc8 240 sll $s1,3,$acc11 241 and $acc10,2040,$acc10 242 ldx [$tbl+$acc9],$acc9 243 fmovs %f0,%f0 244 srl $s3,21,$acc12 ! 245 and $acc11,2040,$acc11 246 ldx [$tbl+$acc10],$acc10 247 srl $s0,13,$acc13 248 and $acc12,2040,$acc12 249 ldx [$tbl+$acc11],$acc11 250 srl $s1,5,$acc14 251 and $acc13,2040,$acc13 252 ldx [$tbl+$acc12],$acc12 ! 253 sll $s2,3,$acc15 254 and $acc14,2040,$acc14 255 ldx [$tbl+$acc13],$acc13 256 and $acc15,2040,$acc15 257 add $key,32,$key 258 ldx [$tbl+$acc14],$acc14 259 fmovs %f0,%f0 260 subcc $rounds,1,$rounds ! 261 ldx [$tbl+$acc15],$acc15 262 bz,a,pn %icc,.Lenc_last 263 add $tbl,2048,$rounds 264 265 srlx $acc1,8,$acc1 266 xor $acc0,$t0,$t0 267 ld [$key+0],$s0 268 fmovs %f0,%f0 269 srlx $acc2,16,$acc2 ! 270 xor $acc1,$t0,$t0 271 ld [$key+4],$s1 272 srlx $acc3,24,$acc3 273 xor $acc2,$t0,$t0 274 ld [$key+8],$s2 275 srlx $acc5,8,$acc5 276 xor $acc3,$t0,$t0 277 ld [$key+12],$s3 ! 278 srlx $acc6,16,$acc6 279 xor $acc4,$t1,$t1 280 fmovs %f0,%f0 281 srlx $acc7,24,$acc7 282 xor $acc5,$t1,$t1 283 srlx $acc9,8,$acc9 284 xor $acc6,$t1,$t1 285 srlx $acc10,16,$acc10 ! 286 xor $acc7,$t1,$t1 287 srlx $acc11,24,$acc11 288 xor $acc8,$t2,$t2 289 srlx $acc13,8,$acc13 290 xor $acc9,$t2,$t2 291 srlx $acc14,16,$acc14 292 xor $acc10,$t2,$t2 293 srlx $acc15,24,$acc15 ! 294 xor $acc11,$t2,$t2 295 xor $acc12,$acc14,$acc14 296 xor $acc13,$t3,$t3 297 srl $t0,21,$acc0 298 xor $acc14,$t3,$t3 299 srl $t1,13,$acc1 300 xor $acc15,$t3,$t3 301 302 and $acc0,2040,$acc0 ! 303 srl $t2,5,$acc2 304 and $acc1,2040,$acc1 305 ldx [$tbl+$acc0],$acc0 306 sll $t3,3,$acc3 307 and $acc2,2040,$acc2 308 ldx [$tbl+$acc1],$acc1 309 fmovs %f0,%f0 310 srl $t1,21,$acc4 ! 311 and $acc3,2040,$acc3 312 ldx [$tbl+$acc2],$acc2 313 srl $t2,13,$acc5 314 and $acc4,2040,$acc4 315 ldx [$tbl+$acc3],$acc3 316 srl $t3,5,$acc6 317 and $acc5,2040,$acc5 318 ldx [$tbl+$acc4],$acc4 ! 319 sll $t0,3,$acc7 320 and $acc6,2040,$acc6 321 ldx [$tbl+$acc5],$acc5 322 srl $t2,21,$acc8 323 and $acc7,2040,$acc7 324 ldx [$tbl+$acc6],$acc6 325 fmovs %f0,%f0 326 srl $t3,13,$acc9 ! 327 and $acc8,2040,$acc8 328 ldx [$tbl+$acc7],$acc7 329 srl $t0,5,$acc10 330 and $acc9,2040,$acc9 331 ldx [$tbl+$acc8],$acc8 332 sll $t1,3,$acc11 333 and $acc10,2040,$acc10 334 ldx [$tbl+$acc9],$acc9 ! 335 srl $t3,21,$acc12 336 and $acc11,2040,$acc11 337 ldx [$tbl+$acc10],$acc10 338 srl $t0,13,$acc13 339 and $acc12,2040,$acc12 340 ldx [$tbl+$acc11],$acc11 341 fmovs %f0,%f0 342 srl $t1,5,$acc14 ! 343 and $acc13,2040,$acc13 344 ldx [$tbl+$acc12],$acc12 345 sll $t2,3,$acc15 346 and $acc14,2040,$acc14 347 ldx [$tbl+$acc13],$acc13 348 srlx $acc1,8,$acc1 349 and $acc15,2040,$acc15 350 ldx [$tbl+$acc14],$acc14 ! 351 352 srlx $acc2,16,$acc2 353 xor $acc0,$s0,$s0 354 ldx [$tbl+$acc15],$acc15 355 srlx $acc3,24,$acc3 356 xor $acc1,$s0,$s0 357 ld [$key+16],$t0 358 fmovs %f0,%f0 359 srlx $acc5,8,$acc5 ! 360 xor $acc2,$s0,$s0 361 ld [$key+20],$t1 362 srlx $acc6,16,$acc6 363 xor $acc3,$s0,$s0 364 ld [$key+24],$t2 365 srlx $acc7,24,$acc7 366 xor $acc4,$s1,$s1 367 ld [$key+28],$t3 ! 368 srlx $acc9,8,$acc9 369 xor $acc5,$s1,$s1 370 ldx [$tbl+2048+0],%g0 ! prefetch te4 371 srlx $acc10,16,$acc10 372 xor $acc6,$s1,$s1 373 ldx [$tbl+2048+32],%g0 ! prefetch te4 374 srlx $acc11,24,$acc11 375 xor $acc7,$s1,$s1 376 ldx [$tbl+2048+64],%g0 ! prefetch te4 377 srlx $acc13,8,$acc13 378 xor $acc8,$s2,$s2 379 ldx [$tbl+2048+96],%g0 ! prefetch te4 380 srlx $acc14,16,$acc14 ! 381 xor $acc9,$s2,$s2 382 ldx [$tbl+2048+128],%g0 ! prefetch te4 383 srlx $acc15,24,$acc15 384 xor $acc10,$s2,$s2 385 ldx [$tbl+2048+160],%g0 ! prefetch te4 386 srl $s0,21,$acc0 387 xor $acc11,$s2,$s2 388 ldx [$tbl+2048+192],%g0 ! prefetch te4 389 xor $acc12,$acc14,$acc14 390 xor $acc13,$s3,$s3 391 ldx [$tbl+2048+224],%g0 ! prefetch te4 392 srl $s1,13,$acc1 ! 393 xor $acc14,$s3,$s3 394 xor $acc15,$s3,$s3 395 ba .Lenc_loop 396 and $acc0,2040,$acc0 397 398.align 32 399.Lenc_last: 400 srlx $acc1,8,$acc1 ! 401 xor $acc0,$t0,$t0 402 ld [$key+0],$s0 403 srlx $acc2,16,$acc2 404 xor $acc1,$t0,$t0 405 ld [$key+4],$s1 406 srlx $acc3,24,$acc3 407 xor $acc2,$t0,$t0 408 ld [$key+8],$s2 ! 409 srlx $acc5,8,$acc5 410 xor $acc3,$t0,$t0 411 ld [$key+12],$s3 412 srlx $acc6,16,$acc6 413 xor $acc4,$t1,$t1 414 srlx $acc7,24,$acc7 415 xor $acc5,$t1,$t1 416 srlx $acc9,8,$acc9 ! 417 xor $acc6,$t1,$t1 418 srlx $acc10,16,$acc10 419 xor $acc7,$t1,$t1 420 srlx $acc11,24,$acc11 421 xor $acc8,$t2,$t2 422 srlx $acc13,8,$acc13 423 xor $acc9,$t2,$t2 424 srlx $acc14,16,$acc14 ! 425 xor $acc10,$t2,$t2 426 srlx $acc15,24,$acc15 427 xor $acc11,$t2,$t2 428 xor $acc12,$acc14,$acc14 429 xor $acc13,$t3,$t3 430 srl $t0,24,$acc0 431 xor $acc14,$t3,$t3 432 srl $t1,16,$acc1 ! 433 xor $acc15,$t3,$t3 434 435 srl $t2,8,$acc2 436 and $acc1,255,$acc1 437 ldub [$rounds+$acc0],$acc0 438 srl $t1,24,$acc4 439 and $acc2,255,$acc2 440 ldub [$rounds+$acc1],$acc1 441 srl $t2,16,$acc5 ! 442 and $t3,255,$acc3 443 ldub [$rounds+$acc2],$acc2 444 ldub [$rounds+$acc3],$acc3 445 srl $t3,8,$acc6 446 and $acc5,255,$acc5 447 ldub [$rounds+$acc4],$acc4 448 fmovs %f0,%f0 449 srl $t2,24,$acc8 ! 450 and $acc6,255,$acc6 451 ldub [$rounds+$acc5],$acc5 452 srl $t3,16,$acc9 453 and $t0,255,$acc7 454 ldub [$rounds+$acc6],$acc6 455 ldub [$rounds+$acc7],$acc7 456 fmovs %f0,%f0 457 srl $t0,8,$acc10 ! 458 and $acc9,255,$acc9 459 ldub [$rounds+$acc8],$acc8 460 srl $t3,24,$acc12 461 and $acc10,255,$acc10 462 ldub [$rounds+$acc9],$acc9 463 srl $t0,16,$acc13 464 and $t1,255,$acc11 465 ldub [$rounds+$acc10],$acc10 ! 466 srl $t1,8,$acc14 467 and $acc13,255,$acc13 468 ldub [$rounds+$acc11],$acc11 469 ldub [$rounds+$acc12],$acc12 470 and $acc14,255,$acc14 471 ldub [$rounds+$acc13],$acc13 472 and $t2,255,$acc15 473 ldub [$rounds+$acc14],$acc14 ! 474 475 sll $acc0,24,$acc0 476 xor $acc3,$s0,$s0 477 ldub [$rounds+$acc15],$acc15 478 sll $acc1,16,$acc1 479 xor $acc0,$s0,$s0 480 ldx [%sp+$bias+$frame+0],%i7 ! restore return address 481 fmovs %f0,%f0 482 sll $acc2,8,$acc2 ! 483 xor $acc1,$s0,$s0 484 sll $acc4,24,$acc4 485 xor $acc2,$s0,$s0 486 sll $acc5,16,$acc5 487 xor $acc7,$s1,$s1 488 sll $acc6,8,$acc6 489 xor $acc4,$s1,$s1 490 sll $acc8,24,$acc8 ! 491 xor $acc5,$s1,$s1 492 sll $acc9,16,$acc9 493 xor $acc11,$s2,$s2 494 sll $acc10,8,$acc10 495 xor $acc6,$s1,$s1 496 sll $acc12,24,$acc12 497 xor $acc8,$s2,$s2 498 sll $acc13,16,$acc13 ! 499 xor $acc9,$s2,$s2 500 sll $acc14,8,$acc14 501 xor $acc10,$s2,$s2 502 xor $acc12,$acc14,$acc14 503 xor $acc13,$s3,$s3 504 xor $acc14,$s3,$s3 505 xor $acc15,$s3,$s3 506 507 ret 508 restore 509.type _sparcv9_AES_encrypt,#function 510.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt) 511 512.align 32 513.globl AES_encrypt 514AES_encrypt: 515 or %o0,%o1,%g1 516 andcc %g1,3,%g0 517 bnz,pn %xcc,.Lunaligned_enc 518 save %sp,-$frame,%sp 519 520 ld [%i0+0],%o0 521 ld [%i0+4],%o1 522 ld [%i0+8],%o2 523 ld [%i0+12],%o3 524 5251: call .+8 526 add %o7,AES_Te-1b,%o4 527 call _sparcv9_AES_encrypt 528 mov %i2,%o5 529 530 st %o0,[%i1+0] 531 st %o1,[%i1+4] 532 st %o2,[%i1+8] 533 st %o3,[%i1+12] 534 535 ret 536 restore 537 538.align 32 539.Lunaligned_enc: 540 ldub [%i0+0],%l0 541 ldub [%i0+1],%l1 542 ldub [%i0+2],%l2 543 544 sll %l0,24,%l0 545 ldub [%i0+3],%l3 546 sll %l1,16,%l1 547 ldub [%i0+4],%l4 548 sll %l2,8,%l2 549 or %l1,%l0,%l0 550 ldub [%i0+5],%l5 551 sll %l4,24,%l4 552 or %l3,%l2,%l2 553 ldub [%i0+6],%l6 554 sll %l5,16,%l5 555 or %l0,%l2,%o0 556 ldub [%i0+7],%l7 557 558 sll %l6,8,%l6 559 or %l5,%l4,%l4 560 ldub [%i0+8],%l0 561 or %l7,%l6,%l6 562 ldub [%i0+9],%l1 563 or %l4,%l6,%o1 564 ldub [%i0+10],%l2 565 566 sll %l0,24,%l0 567 ldub [%i0+11],%l3 568 sll %l1,16,%l1 569 ldub [%i0+12],%l4 570 sll %l2,8,%l2 571 or %l1,%l0,%l0 572 ldub [%i0+13],%l5 573 sll %l4,24,%l4 574 or %l3,%l2,%l2 575 ldub [%i0+14],%l6 576 sll %l5,16,%l5 577 or %l0,%l2,%o2 578 ldub [%i0+15],%l7 579 580 sll %l6,8,%l6 581 or %l5,%l4,%l4 582 or %l7,%l6,%l6 583 or %l4,%l6,%o3 584 5851: call .+8 586 add %o7,AES_Te-1b,%o4 587 call _sparcv9_AES_encrypt 588 mov %i2,%o5 589 590 srl %o0,24,%l0 591 srl %o0,16,%l1 592 stb %l0,[%i1+0] 593 srl %o0,8,%l2 594 stb %l1,[%i1+1] 595 stb %l2,[%i1+2] 596 srl %o1,24,%l4 597 stb %o0,[%i1+3] 598 599 srl %o1,16,%l5 600 stb %l4,[%i1+4] 601 srl %o1,8,%l6 602 stb %l5,[%i1+5] 603 stb %l6,[%i1+6] 604 srl %o2,24,%l0 605 stb %o1,[%i1+7] 606 607 srl %o2,16,%l1 608 stb %l0,[%i1+8] 609 srl %o2,8,%l2 610 stb %l1,[%i1+9] 611 stb %l2,[%i1+10] 612 srl %o3,24,%l4 613 stb %o2,[%i1+11] 614 615 srl %o3,16,%l5 616 stb %l4,[%i1+12] 617 srl %o3,8,%l6 618 stb %l5,[%i1+13] 619 stb %l6,[%i1+14] 620 stb %o3,[%i1+15] 621 622 ret 623 restore 624.type AES_encrypt,#function 625.size AES_encrypt,(.-AES_encrypt) 626 627___ 628 629$code.=<<___; 630.align 256 631AES_Td: 632___ 633&_data_word( 634 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 635 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 636 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 637 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 638 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 639 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 640 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 641 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 642 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 643 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 644 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 645 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 646 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 647 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 648 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 649 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 650 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 651 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 652 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 653 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 654 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 655 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 656 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 657 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 658 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 659 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 660 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 661 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 662 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 663 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 664 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 665 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 666 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 667 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 668 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 669 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 670 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 671 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 672 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 673 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 674 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 675 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 676 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 677 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 678 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 679 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 680 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 681 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 682 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 683 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 684 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 685 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 686 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 687 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 688 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 689 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 690 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 691 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 692 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 693 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 694 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 695 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 696 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 697 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 698$code.=<<___; 699 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 700 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 701 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 702 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 703 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 704 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 705 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 706 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 707 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 708 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 709 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 710 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 711 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 712 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 713 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 714 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 715 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 716 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 717 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 718 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 719 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 720 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 721 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 722 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 723 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 724 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 725 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 726 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 727 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 728 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 729 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 730 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 731.type AES_Td,#object 732.size AES_Td,(.-AES_Td) 733 734.align 64 735.skip 16 736_sparcv9_AES_decrypt: 737 save %sp,-$frame-$locals,%sp 738 stx %i7,[%sp+$bias+$frame+0] ! off-load return address 739 ld [$key+240],$rounds 740 ld [$key+0],$t0 741 ld [$key+4],$t1 ! 742 ld [$key+8],$t2 743 ld [$key+12],$t3 744 srl $rounds,1,$rounds 745 xor $t0,$s0,$s0 746 ld [$key+16],$t0 747 xor $t1,$s1,$s1 748 ld [$key+20],$t1 749 srl $s0,21,$acc0 ! 750 xor $t2,$s2,$s2 751 ld [$key+24],$t2 752 xor $t3,$s3,$s3 753 and $acc0,2040,$acc0 754 ld [$key+28],$t3 755 srl $s3,13,$acc1 756 nop 757.Ldec_loop: 758 srl $s2,5,$acc2 ! 759 and $acc1,2040,$acc1 760 ldx [$tbl+$acc0],$acc0 761 sll $s1,3,$acc3 762 and $acc2,2040,$acc2 763 ldx [$tbl+$acc1],$acc1 764 srl $s1,21,$acc4 765 and $acc3,2040,$acc3 766 ldx [$tbl+$acc2],$acc2 ! 767 srl $s0,13,$acc5 768 and $acc4,2040,$acc4 769 ldx [$tbl+$acc3],$acc3 770 srl $s3,5,$acc6 771 and $acc5,2040,$acc5 772 ldx [$tbl+$acc4],$acc4 773 fmovs %f0,%f0 774 sll $s2,3,$acc7 ! 775 and $acc6,2040,$acc6 776 ldx [$tbl+$acc5],$acc5 777 srl $s2,21,$acc8 778 and $acc7,2040,$acc7 779 ldx [$tbl+$acc6],$acc6 780 srl $s1,13,$acc9 781 and $acc8,2040,$acc8 782 ldx [$tbl+$acc7],$acc7 ! 783 srl $s0,5,$acc10 784 and $acc9,2040,$acc9 785 ldx [$tbl+$acc8],$acc8 786 sll $s3,3,$acc11 787 and $acc10,2040,$acc10 788 ldx [$tbl+$acc9],$acc9 789 fmovs %f0,%f0 790 srl $s3,21,$acc12 ! 791 and $acc11,2040,$acc11 792 ldx [$tbl+$acc10],$acc10 793 srl $s2,13,$acc13 794 and $acc12,2040,$acc12 795 ldx [$tbl+$acc11],$acc11 796 srl $s1,5,$acc14 797 and $acc13,2040,$acc13 798 ldx [$tbl+$acc12],$acc12 ! 799 sll $s0,3,$acc15 800 and $acc14,2040,$acc14 801 ldx [$tbl+$acc13],$acc13 802 and $acc15,2040,$acc15 803 add $key,32,$key 804 ldx [$tbl+$acc14],$acc14 805 fmovs %f0,%f0 806 subcc $rounds,1,$rounds ! 807 ldx [$tbl+$acc15],$acc15 808 bz,a,pn %icc,.Ldec_last 809 add $tbl,2048,$rounds 810 811 srlx $acc1,8,$acc1 812 xor $acc0,$t0,$t0 813 ld [$key+0],$s0 814 fmovs %f0,%f0 815 srlx $acc2,16,$acc2 ! 816 xor $acc1,$t0,$t0 817 ld [$key+4],$s1 818 srlx $acc3,24,$acc3 819 xor $acc2,$t0,$t0 820 ld [$key+8],$s2 821 srlx $acc5,8,$acc5 822 xor $acc3,$t0,$t0 823 ld [$key+12],$s3 ! 824 srlx $acc6,16,$acc6 825 xor $acc4,$t1,$t1 826 fmovs %f0,%f0 827 srlx $acc7,24,$acc7 828 xor $acc5,$t1,$t1 829 srlx $acc9,8,$acc9 830 xor $acc6,$t1,$t1 831 srlx $acc10,16,$acc10 ! 832 xor $acc7,$t1,$t1 833 srlx $acc11,24,$acc11 834 xor $acc8,$t2,$t2 835 srlx $acc13,8,$acc13 836 xor $acc9,$t2,$t2 837 srlx $acc14,16,$acc14 838 xor $acc10,$t2,$t2 839 srlx $acc15,24,$acc15 ! 840 xor $acc11,$t2,$t2 841 xor $acc12,$acc14,$acc14 842 xor $acc13,$t3,$t3 843 srl $t0,21,$acc0 844 xor $acc14,$t3,$t3 845 xor $acc15,$t3,$t3 846 srl $t3,13,$acc1 847 848 and $acc0,2040,$acc0 ! 849 srl $t2,5,$acc2 850 and $acc1,2040,$acc1 851 ldx [$tbl+$acc0],$acc0 852 sll $t1,3,$acc3 853 and $acc2,2040,$acc2 854 ldx [$tbl+$acc1],$acc1 855 fmovs %f0,%f0 856 srl $t1,21,$acc4 ! 857 and $acc3,2040,$acc3 858 ldx [$tbl+$acc2],$acc2 859 srl $t0,13,$acc5 860 and $acc4,2040,$acc4 861 ldx [$tbl+$acc3],$acc3 862 srl $t3,5,$acc6 863 and $acc5,2040,$acc5 864 ldx [$tbl+$acc4],$acc4 ! 865 sll $t2,3,$acc7 866 and $acc6,2040,$acc6 867 ldx [$tbl+$acc5],$acc5 868 srl $t2,21,$acc8 869 and $acc7,2040,$acc7 870 ldx [$tbl+$acc6],$acc6 871 fmovs %f0,%f0 872 srl $t1,13,$acc9 ! 873 and $acc8,2040,$acc8 874 ldx [$tbl+$acc7],$acc7 875 srl $t0,5,$acc10 876 and $acc9,2040,$acc9 877 ldx [$tbl+$acc8],$acc8 878 sll $t3,3,$acc11 879 and $acc10,2040,$acc10 880 ldx [$tbl+$acc9],$acc9 ! 881 srl $t3,21,$acc12 882 and $acc11,2040,$acc11 883 ldx [$tbl+$acc10],$acc10 884 srl $t2,13,$acc13 885 and $acc12,2040,$acc12 886 ldx [$tbl+$acc11],$acc11 887 fmovs %f0,%f0 888 srl $t1,5,$acc14 ! 889 and $acc13,2040,$acc13 890 ldx [$tbl+$acc12],$acc12 891 sll $t0,3,$acc15 892 and $acc14,2040,$acc14 893 ldx [$tbl+$acc13],$acc13 894 srlx $acc1,8,$acc1 895 and $acc15,2040,$acc15 896 ldx [$tbl+$acc14],$acc14 ! 897 898 srlx $acc2,16,$acc2 899 xor $acc0,$s0,$s0 900 ldx [$tbl+$acc15],$acc15 901 srlx $acc3,24,$acc3 902 xor $acc1,$s0,$s0 903 ld [$key+16],$t0 904 fmovs %f0,%f0 905 srlx $acc5,8,$acc5 ! 906 xor $acc2,$s0,$s0 907 ld [$key+20],$t1 908 srlx $acc6,16,$acc6 909 xor $acc3,$s0,$s0 910 ld [$key+24],$t2 911 srlx $acc7,24,$acc7 912 xor $acc4,$s1,$s1 913 ld [$key+28],$t3 ! 914 srlx $acc9,8,$acc9 915 xor $acc5,$s1,$s1 916 ldx [$tbl+2048+0],%g0 ! prefetch td4 917 srlx $acc10,16,$acc10 918 xor $acc6,$s1,$s1 919 ldx [$tbl+2048+32],%g0 ! prefetch td4 920 srlx $acc11,24,$acc11 921 xor $acc7,$s1,$s1 922 ldx [$tbl+2048+64],%g0 ! prefetch td4 923 srlx $acc13,8,$acc13 924 xor $acc8,$s2,$s2 925 ldx [$tbl+2048+96],%g0 ! prefetch td4 926 srlx $acc14,16,$acc14 ! 927 xor $acc9,$s2,$s2 928 ldx [$tbl+2048+128],%g0 ! prefetch td4 929 srlx $acc15,24,$acc15 930 xor $acc10,$s2,$s2 931 ldx [$tbl+2048+160],%g0 ! prefetch td4 932 srl $s0,21,$acc0 933 xor $acc11,$s2,$s2 934 ldx [$tbl+2048+192],%g0 ! prefetch td4 935 xor $acc12,$acc14,$acc14 936 xor $acc13,$s3,$s3 937 ldx [$tbl+2048+224],%g0 ! prefetch td4 938 and $acc0,2040,$acc0 ! 939 xor $acc14,$s3,$s3 940 xor $acc15,$s3,$s3 941 ba .Ldec_loop 942 srl $s3,13,$acc1 943 944.align 32 945.Ldec_last: 946 srlx $acc1,8,$acc1 ! 947 xor $acc0,$t0,$t0 948 ld [$key+0],$s0 949 srlx $acc2,16,$acc2 950 xor $acc1,$t0,$t0 951 ld [$key+4],$s1 952 srlx $acc3,24,$acc3 953 xor $acc2,$t0,$t0 954 ld [$key+8],$s2 ! 955 srlx $acc5,8,$acc5 956 xor $acc3,$t0,$t0 957 ld [$key+12],$s3 958 srlx $acc6,16,$acc6 959 xor $acc4,$t1,$t1 960 srlx $acc7,24,$acc7 961 xor $acc5,$t1,$t1 962 srlx $acc9,8,$acc9 ! 963 xor $acc6,$t1,$t1 964 srlx $acc10,16,$acc10 965 xor $acc7,$t1,$t1 966 srlx $acc11,24,$acc11 967 xor $acc8,$t2,$t2 968 srlx $acc13,8,$acc13 969 xor $acc9,$t2,$t2 970 srlx $acc14,16,$acc14 ! 971 xor $acc10,$t2,$t2 972 srlx $acc15,24,$acc15 973 xor $acc11,$t2,$t2 974 xor $acc12,$acc14,$acc14 975 xor $acc13,$t3,$t3 976 srl $t0,24,$acc0 977 xor $acc14,$t3,$t3 978 xor $acc15,$t3,$t3 ! 979 srl $t3,16,$acc1 980 981 srl $t2,8,$acc2 982 and $acc1,255,$acc1 983 ldub [$rounds+$acc0],$acc0 984 srl $t1,24,$acc4 985 and $acc2,255,$acc2 986 ldub [$rounds+$acc1],$acc1 987 srl $t0,16,$acc5 ! 988 and $t1,255,$acc3 989 ldub [$rounds+$acc2],$acc2 990 ldub [$rounds+$acc3],$acc3 991 srl $t3,8,$acc6 992 and $acc5,255,$acc5 993 ldub [$rounds+$acc4],$acc4 994 fmovs %f0,%f0 995 srl $t2,24,$acc8 ! 996 and $acc6,255,$acc6 997 ldub [$rounds+$acc5],$acc5 998 srl $t1,16,$acc9 999 and $t2,255,$acc7 1000 ldub [$rounds+$acc6],$acc6 1001 ldub [$rounds+$acc7],$acc7 1002 fmovs %f0,%f0 1003 srl $t0,8,$acc10 ! 1004 and $acc9,255,$acc9 1005 ldub [$rounds+$acc8],$acc8 1006 srl $t3,24,$acc12 1007 and $acc10,255,$acc10 1008 ldub [$rounds+$acc9],$acc9 1009 srl $t2,16,$acc13 1010 and $t3,255,$acc11 1011 ldub [$rounds+$acc10],$acc10 ! 1012 srl $t1,8,$acc14 1013 and $acc13,255,$acc13 1014 ldub [$rounds+$acc11],$acc11 1015 ldub [$rounds+$acc12],$acc12 1016 and $acc14,255,$acc14 1017 ldub [$rounds+$acc13],$acc13 1018 and $t0,255,$acc15 1019 ldub [$rounds+$acc14],$acc14 ! 1020 1021 sll $acc0,24,$acc0 1022 xor $acc3,$s0,$s0 1023 ldub [$rounds+$acc15],$acc15 1024 sll $acc1,16,$acc1 1025 xor $acc0,$s0,$s0 1026 ldx [%sp+$bias+$frame+0],%i7 ! restore return address 1027 fmovs %f0,%f0 1028 sll $acc2,8,$acc2 ! 1029 xor $acc1,$s0,$s0 1030 sll $acc4,24,$acc4 1031 xor $acc2,$s0,$s0 1032 sll $acc5,16,$acc5 1033 xor $acc7,$s1,$s1 1034 sll $acc6,8,$acc6 1035 xor $acc4,$s1,$s1 1036 sll $acc8,24,$acc8 ! 1037 xor $acc5,$s1,$s1 1038 sll $acc9,16,$acc9 1039 xor $acc11,$s2,$s2 1040 sll $acc10,8,$acc10 1041 xor $acc6,$s1,$s1 1042 sll $acc12,24,$acc12 1043 xor $acc8,$s2,$s2 1044 sll $acc13,16,$acc13 ! 1045 xor $acc9,$s2,$s2 1046 sll $acc14,8,$acc14 1047 xor $acc10,$s2,$s2 1048 xor $acc12,$acc14,$acc14 1049 xor $acc13,$s3,$s3 1050 xor $acc14,$s3,$s3 1051 xor $acc15,$s3,$s3 1052 1053 ret 1054 restore 1055.type _sparcv9_AES_decrypt,#function 1056.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt) 1057 1058.align 32 1059.globl AES_decrypt 1060AES_decrypt: 1061 or %o0,%o1,%g1 1062 andcc %g1,3,%g0 1063 bnz,pn %xcc,.Lunaligned_dec 1064 save %sp,-$frame,%sp 1065 1066 ld [%i0+0],%o0 1067 ld [%i0+4],%o1 1068 ld [%i0+8],%o2 1069 ld [%i0+12],%o3 1070 10711: call .+8 1072 add %o7,AES_Td-1b,%o4 1073 call _sparcv9_AES_decrypt 1074 mov %i2,%o5 1075 1076 st %o0,[%i1+0] 1077 st %o1,[%i1+4] 1078 st %o2,[%i1+8] 1079 st %o3,[%i1+12] 1080 1081 ret 1082 restore 1083 1084.align 32 1085.Lunaligned_dec: 1086 ldub [%i0+0],%l0 1087 ldub [%i0+1],%l1 1088 ldub [%i0+2],%l2 1089 1090 sll %l0,24,%l0 1091 ldub [%i0+3],%l3 1092 sll %l1,16,%l1 1093 ldub [%i0+4],%l4 1094 sll %l2,8,%l2 1095 or %l1,%l0,%l0 1096 ldub [%i0+5],%l5 1097 sll %l4,24,%l4 1098 or %l3,%l2,%l2 1099 ldub [%i0+6],%l6 1100 sll %l5,16,%l5 1101 or %l0,%l2,%o0 1102 ldub [%i0+7],%l7 1103 1104 sll %l6,8,%l6 1105 or %l5,%l4,%l4 1106 ldub [%i0+8],%l0 1107 or %l7,%l6,%l6 1108 ldub [%i0+9],%l1 1109 or %l4,%l6,%o1 1110 ldub [%i0+10],%l2 1111 1112 sll %l0,24,%l0 1113 ldub [%i0+11],%l3 1114 sll %l1,16,%l1 1115 ldub [%i0+12],%l4 1116 sll %l2,8,%l2 1117 or %l1,%l0,%l0 1118 ldub [%i0+13],%l5 1119 sll %l4,24,%l4 1120 or %l3,%l2,%l2 1121 ldub [%i0+14],%l6 1122 sll %l5,16,%l5 1123 or %l0,%l2,%o2 1124 ldub [%i0+15],%l7 1125 1126 sll %l6,8,%l6 1127 or %l5,%l4,%l4 1128 or %l7,%l6,%l6 1129 or %l4,%l6,%o3 1130 11311: call .+8 1132 add %o7,AES_Td-1b,%o4 1133 call _sparcv9_AES_decrypt 1134 mov %i2,%o5 1135 1136 srl %o0,24,%l0 1137 srl %o0,16,%l1 1138 stb %l0,[%i1+0] 1139 srl %o0,8,%l2 1140 stb %l1,[%i1+1] 1141 stb %l2,[%i1+2] 1142 srl %o1,24,%l4 1143 stb %o0,[%i1+3] 1144 1145 srl %o1,16,%l5 1146 stb %l4,[%i1+4] 1147 srl %o1,8,%l6 1148 stb %l5,[%i1+5] 1149 stb %l6,[%i1+6] 1150 srl %o2,24,%l0 1151 stb %o1,[%i1+7] 1152 1153 srl %o2,16,%l1 1154 stb %l0,[%i1+8] 1155 srl %o2,8,%l2 1156 stb %l1,[%i1+9] 1157 stb %l2,[%i1+10] 1158 srl %o3,24,%l4 1159 stb %o2,[%i1+11] 1160 1161 srl %o3,16,%l5 1162 stb %l4,[%i1+12] 1163 srl %o3,8,%l6 1164 stb %l5,[%i1+13] 1165 stb %l6,[%i1+14] 1166 stb %o3,[%i1+15] 1167 1168 ret 1169 restore 1170.type AES_decrypt,#function 1171.size AES_decrypt,(.-AES_decrypt) 1172___ 1173 1174# fmovs instructions substituting for FP nops were originally added 1175# to meet specific instruction alignment requirements to maximize ILP. 1176# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have 1177# undesired effect, so just omit them and sacrifice some portion of 1178# percent in performance... 1179$code =~ s/fmovs.*$//gm; 1180 1181print $code; 1182close STDOUT; # ensure flush 1183