1#if defined(__arm__) 2#include <openssl/arm_arch.h> 3 4#if __ARM_MAX_ARCH__>=7 5.text 6.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 7.fpu neon 8.code 32 9#undef __thumb2__ 10.align 5 11.Lrcon: 12.long 0x01,0x01,0x01,0x01 13.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 14.long 0x1b,0x1b,0x1b,0x1b 15 16.globl aes_hw_set_encrypt_key 17.hidden aes_hw_set_encrypt_key 18.type aes_hw_set_encrypt_key,%function 19.align 5 20aes_hw_set_encrypt_key: 21.Lenc_key: 22 mov r3,#-1 23 cmp r0,#0 24 beq .Lenc_key_abort 25 cmp r2,#0 26 beq .Lenc_key_abort 27 mov r3,#-2 28 cmp r1,#128 29 blt .Lenc_key_abort 30 cmp r1,#256 31 bgt .Lenc_key_abort 32 tst r1,#0x3f 33 bne .Lenc_key_abort 34 35 adr r3,.Lrcon 36 cmp r1,#192 37 38 veor q0,q0,q0 39 vld1.8 {q3},[r0]! 40 mov r1,#8 @ reuse r1 41 vld1.32 {q1,q2},[r3]! 42 43 blt .Loop128 44 beq .L192 45 b .L256 46 47.align 4 48.Loop128: 49 vtbl.8 d20,{q3},d4 50 vtbl.8 d21,{q3},d5 51 vext.8 q9,q0,q3,#12 52 vst1.32 {q3},[r2]! 53.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 54 subs r1,r1,#1 55 56 veor q3,q3,q9 57 vext.8 q9,q0,q9,#12 58 veor q3,q3,q9 59 vext.8 q9,q0,q9,#12 60 veor q10,q10,q1 61 veor q3,q3,q9 62 vshl.u8 q1,q1,#1 63 veor q3,q3,q10 64 bne .Loop128 65 66 vld1.32 {q1},[r3] 67 68 vtbl.8 d20,{q3},d4 69 vtbl.8 d21,{q3},d5 70 vext.8 q9,q0,q3,#12 71 vst1.32 {q3},[r2]! 72.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 73 74 veor q3,q3,q9 75 vext.8 q9,q0,q9,#12 76 veor q3,q3,q9 77 vext.8 q9,q0,q9,#12 78 veor q10,q10,q1 79 veor q3,q3,q9 80 vshl.u8 q1,q1,#1 81 veor q3,q3,q10 82 83 vtbl.8 d20,{q3},d4 84 vtbl.8 d21,{q3},d5 85 vext.8 q9,q0,q3,#12 86 vst1.32 {q3},[r2]! 87.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 88 89 veor q3,q3,q9 90 vext.8 q9,q0,q9,#12 91 veor q3,q3,q9 92 vext.8 q9,q0,q9,#12 93 veor q10,q10,q1 94 veor q3,q3,q9 95 veor q3,q3,q10 96 vst1.32 {q3},[r2] 97 add r2,r2,#0x50 98 99 mov r12,#10 100 b .Ldone 101 102.align 4 103.L192: 104 vld1.8 {d16},[r0]! 105 vmov.i8 q10,#8 @ borrow q10 106 vst1.32 {q3},[r2]! 107 vsub.i8 q2,q2,q10 @ adjust the mask 108 109.Loop192: 110 vtbl.8 d20,{q8},d4 111 vtbl.8 d21,{q8},d5 112 vext.8 q9,q0,q3,#12 113 vst1.32 {d16},[r2]! 114.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 115 subs r1,r1,#1 116 117 veor q3,q3,q9 118 vext.8 q9,q0,q9,#12 119 veor q3,q3,q9 120 vext.8 q9,q0,q9,#12 121 veor q3,q3,q9 122 123 vdup.32 q9,d7[1] 124 veor q9,q9,q8 125 veor q10,q10,q1 126 vext.8 q8,q0,q8,#12 127 vshl.u8 q1,q1,#1 128 veor q8,q8,q9 129 veor q3,q3,q10 130 veor q8,q8,q10 131 vst1.32 {q3},[r2]! 132 bne .Loop192 133 134 mov r12,#12 135 add r2,r2,#0x20 136 b .Ldone 137 138.align 4 139.L256: 140 vld1.8 {q8},[r0] 141 mov r1,#7 142 mov r12,#14 143 vst1.32 {q3},[r2]! 144 145.Loop256: 146 vtbl.8 d20,{q8},d4 147 vtbl.8 d21,{q8},d5 148 vext.8 q9,q0,q3,#12 149 vst1.32 {q8},[r2]! 150.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 151 subs r1,r1,#1 152 153 veor q3,q3,q9 154 vext.8 q9,q0,q9,#12 155 veor q3,q3,q9 156 vext.8 q9,q0,q9,#12 157 veor q10,q10,q1 158 veor q3,q3,q9 159 vshl.u8 q1,q1,#1 160 veor q3,q3,q10 161 vst1.32 {q3},[r2]! 162 beq .Ldone 163 164 vdup.32 q10,d7[1] 165 vext.8 q9,q0,q8,#12 166.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 167 168 veor q8,q8,q9 169 vext.8 q9,q0,q9,#12 170 veor q8,q8,q9 171 vext.8 q9,q0,q9,#12 172 veor q8,q8,q9 173 174 veor q8,q8,q10 175 b .Loop256 176 177.Ldone: 178 str r12,[r2] 179 mov r3,#0 180 181.Lenc_key_abort: 182 mov r0,r3 @ return value 183 184 bx lr 185.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 186 187.globl aes_hw_set_decrypt_key 188.hidden aes_hw_set_decrypt_key 189.type aes_hw_set_decrypt_key,%function 190.align 5 191aes_hw_set_decrypt_key: 192 stmdb sp!,{r4,lr} 193 bl .Lenc_key 194 195 cmp r0,#0 196 bne .Ldec_key_abort 197 198 sub r2,r2,#240 @ restore original r2 199 mov r4,#-16 200 add r0,r2,r12,lsl#4 @ end of key schedule 201 202 vld1.32 {q0},[r2] 203 vld1.32 {q1},[r0] 204 vst1.32 {q0},[r0],r4 205 vst1.32 {q1},[r2]! 206 207.Loop_imc: 208 vld1.32 {q0},[r2] 209 vld1.32 {q1},[r0] 210.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 211.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 212 vst1.32 {q0},[r0],r4 213 vst1.32 {q1},[r2]! 214 cmp r0,r2 215 bhi .Loop_imc 216 217 vld1.32 {q0},[r2] 218.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 219 vst1.32 {q0},[r0] 220 221 eor r0,r0,r0 @ return value 222.Ldec_key_abort: 223 ldmia sp!,{r4,pc} 224.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 225.globl aes_hw_encrypt 226.hidden aes_hw_encrypt 227.type aes_hw_encrypt,%function 228.align 5 229aes_hw_encrypt: 230 ldr r3,[r2,#240] 231 vld1.32 {q0},[r2]! 232 vld1.8 {q2},[r0] 233 sub r3,r3,#2 234 vld1.32 {q1},[r2]! 235 236.Loop_enc: 237.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 238.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 239 vld1.32 {q0},[r2]! 240 subs r3,r3,#2 241.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 242.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 243 vld1.32 {q1},[r2]! 244 bgt .Loop_enc 245 246.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 247.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 248 vld1.32 {q0},[r2] 249.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 250 veor q2,q2,q0 251 252 vst1.8 {q2},[r1] 253 bx lr 254.size aes_hw_encrypt,.-aes_hw_encrypt 255.globl aes_hw_decrypt 256.hidden aes_hw_decrypt 257.type aes_hw_decrypt,%function 258.align 5 259aes_hw_decrypt: 260 ldr r3,[r2,#240] 261 vld1.32 {q0},[r2]! 262 vld1.8 {q2},[r0] 263 sub r3,r3,#2 264 vld1.32 {q1},[r2]! 265 266.Loop_dec: 267.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 268.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 269 vld1.32 {q0},[r2]! 270 subs r3,r3,#2 271.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 272.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 273 vld1.32 {q1},[r2]! 274 bgt .Loop_dec 275 276.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 277.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 278 vld1.32 {q0},[r2] 279.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 280 veor q2,q2,q0 281 282 vst1.8 {q2},[r1] 283 bx lr 284.size aes_hw_decrypt,.-aes_hw_decrypt 285.globl aes_hw_cbc_encrypt 286.hidden aes_hw_cbc_encrypt 287.type aes_hw_cbc_encrypt,%function 288.align 5 289aes_hw_cbc_encrypt: 290 mov ip,sp 291 stmdb sp!,{r4,r5,r6,r7,r8,lr} 292 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 293 ldmia ip,{r4,r5} @ load remaining args 294 subs r2,r2,#16 295 mov r8,#16 296 blo .Lcbc_abort 297 moveq r8,#0 298 299 cmp r5,#0 @ en- or decrypting? 300 ldr r5,[r3,#240] 301 and r2,r2,#-16 302 vld1.8 {q6},[r4] 303 vld1.8 {q0},[r0],r8 304 305 vld1.32 {q8,q9},[r3] @ load key schedule... 306 sub r5,r5,#6 307 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 308 sub r5,r5,#2 309 vld1.32 {q10,q11},[r7]! 310 vld1.32 {q12,q13},[r7]! 311 vld1.32 {q14,q15},[r7]! 312 vld1.32 {q7},[r7] 313 314 add r7,r3,#32 315 mov r6,r5 316 beq .Lcbc_dec 317 318 cmp r5,#2 319 veor q0,q0,q6 320 veor q5,q8,q7 321 beq .Lcbc_enc128 322 323 vld1.32 {q2,q3},[r7] 324 add r7,r3,#16 325 add r6,r3,#16*4 326 add r12,r3,#16*5 327.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 328.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 329 add r14,r3,#16*6 330 add r3,r3,#16*7 331 b .Lenter_cbc_enc 332 333.align 4 334.Loop_cbc_enc: 335.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 336.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 337 vst1.8 {q6},[r1]! 338.Lenter_cbc_enc: 339.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 340.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 341.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 342.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 343 vld1.32 {q8},[r6] 344 cmp r5,#4 345.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 346.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 347 vld1.32 {q9},[r12] 348 beq .Lcbc_enc192 349 350.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 351.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 352 vld1.32 {q8},[r14] 353.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 354.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 355 vld1.32 {q9},[r3] 356 nop 357 358.Lcbc_enc192: 359.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 360.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 361 subs r2,r2,#16 362.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 363.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364 moveq r8,#0 365.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 366.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 367.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 368.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 369 vld1.8 {q8},[r0],r8 370.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 371.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 372 veor q8,q8,q5 373.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 374.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 375 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 376.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 377.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 378.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 379 veor q6,q0,q7 380 bhs .Loop_cbc_enc 381 382 vst1.8 {q6},[r1]! 383 b .Lcbc_done 384 385.align 5 386.Lcbc_enc128: 387 vld1.32 {q2,q3},[r7] 388.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 389.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 390 b .Lenter_cbc_enc128 391.Loop_cbc_enc128: 392.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 393.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 394 vst1.8 {q6},[r1]! 395.Lenter_cbc_enc128: 396.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 397.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 398 subs r2,r2,#16 399.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 400.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 401 moveq r8,#0 402.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 403.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 404.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 405.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 406.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 407.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 408 vld1.8 {q8},[r0],r8 409.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 410.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 411.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 412.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 413.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 414.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 415 veor q8,q8,q5 416.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 417 veor q6,q0,q7 418 bhs .Loop_cbc_enc128 419 420 vst1.8 {q6},[r1]! 421 b .Lcbc_done 422.align 5 423.Lcbc_dec: 424 vld1.8 {q10},[r0]! 425 subs r2,r2,#32 @ bias 426 add r6,r5,#2 427 vorr q3,q0,q0 428 vorr q1,q0,q0 429 vorr q11,q10,q10 430 blo .Lcbc_dec_tail 431 432 vorr q1,q10,q10 433 vld1.8 {q10},[r0]! 434 vorr q2,q0,q0 435 vorr q3,q1,q1 436 vorr q11,q10,q10 437 438.Loop3x_cbc_dec: 439.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 440.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 441.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 442.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 443.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 444.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 445 vld1.32 {q8},[r7]! 446 subs r6,r6,#2 447.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 448.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 449.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 450.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 451.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 452.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 453 vld1.32 {q9},[r7]! 454 bgt .Loop3x_cbc_dec 455 456.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 457.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 458.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 459.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 460.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 461.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 462 veor q4,q6,q7 463 subs r2,r2,#0x30 464 veor q5,q2,q7 465 movlo r6,r2 @ r6, r6, is zero at this point 466.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 467.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 468.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 469.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 470.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 471.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 472 veor q9,q3,q7 473 add r0,r0,r6 @ r0 is adjusted in such way that 474 @ at exit from the loop q1-q10 475 @ are loaded with last "words" 476 vorr q6,q11,q11 477 mov r7,r3 478.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 479.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 480.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 481.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 482.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 483.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 484 vld1.8 {q2},[r0]! 485.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 486.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 487.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 488.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 489.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 490.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 491 vld1.8 {q3},[r0]! 492.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 493.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 494.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 495.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 496.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 497.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 498 vld1.8 {q11},[r0]! 499.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 500.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 501.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 502 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 503 add r6,r5,#2 504 veor q4,q4,q0 505 veor q5,q5,q1 506 veor q10,q10,q9 507 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 508 vst1.8 {q4},[r1]! 509 vorr q0,q2,q2 510 vst1.8 {q5},[r1]! 511 vorr q1,q3,q3 512 vst1.8 {q10},[r1]! 513 vorr q10,q11,q11 514 bhs .Loop3x_cbc_dec 515 516 cmn r2,#0x30 517 beq .Lcbc_done 518 nop 519 520.Lcbc_dec_tail: 521.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 522.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 523.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 524.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 525 vld1.32 {q8},[r7]! 526 subs r6,r6,#2 527.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 528.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 529.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 530.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 531 vld1.32 {q9},[r7]! 532 bgt .Lcbc_dec_tail 533 534.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 535.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 536.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 537.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 538.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 539.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 540.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 541.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 542.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 543.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 544.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 545.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 546 cmn r2,#0x20 547.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 548.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 549.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 550.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 551 veor q5,q6,q7 552.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 553.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 554.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 555.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 556 veor q9,q3,q7 557.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 558.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 559 beq .Lcbc_dec_one 560 veor q5,q5,q1 561 veor q9,q9,q10 562 vorr q6,q11,q11 563 vst1.8 {q5},[r1]! 564 vst1.8 {q9},[r1]! 565 b .Lcbc_done 566 567.Lcbc_dec_one: 568 veor q5,q5,q10 569 vorr q6,q11,q11 570 vst1.8 {q5},[r1]! 571 572.Lcbc_done: 573 vst1.8 {q6},[r4] 574.Lcbc_abort: 575 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 576 ldmia sp!,{r4,r5,r6,r7,r8,pc} 577.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 578.globl aes_hw_ctr32_encrypt_blocks 579.hidden aes_hw_ctr32_encrypt_blocks 580.type aes_hw_ctr32_encrypt_blocks,%function 581.align 5 582aes_hw_ctr32_encrypt_blocks: 583 mov ip,sp 584 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 585 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 586 ldr r4, [ip] @ load remaining arg 587 ldr r5,[r3,#240] 588 589 ldr r8, [r4, #12] 590 vld1.32 {q0},[r4] 591 592 vld1.32 {q8,q9},[r3] @ load key schedule... 593 sub r5,r5,#4 594 mov r12,#16 595 cmp r2,#2 596 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 597 sub r5,r5,#2 598 vld1.32 {q12,q13},[r7]! 599 vld1.32 {q14,q15},[r7]! 600 vld1.32 {q7},[r7] 601 add r7,r3,#32 602 mov r6,r5 603 movlo r12,#0 604#ifndef __ARMEB__ 605 rev r8, r8 606#endif 607 vorr q1,q0,q0 608 add r10, r8, #1 609 vorr q10,q0,q0 610 add r8, r8, #2 611 vorr q6,q0,q0 612 rev r10, r10 613 vmov.32 d3[1],r10 614 bls .Lctr32_tail 615 rev r12, r8 616 sub r2,r2,#3 @ bias 617 vmov.32 d21[1],r12 618 b .Loop3x_ctr32 619 620.align 4 621.Loop3x_ctr32: 622.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 623.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 624.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 625.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 626.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 627.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 628 vld1.32 {q8},[r7]! 629 subs r6,r6,#2 630.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 631.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 632.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 633.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 634.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 635.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 636 vld1.32 {q9},[r7]! 637 bgt .Loop3x_ctr32 638 639.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 640.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 641.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 642.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 643 vld1.8 {q2},[r0]! 644 vorr q0,q6,q6 645.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 646.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 647 vld1.8 {q3},[r0]! 648 vorr q1,q6,q6 649.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 650.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 651.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 652.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 653 vld1.8 {q11},[r0]! 654 mov r7,r3 655.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 656.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 657 vorr q10,q6,q6 658 add r9,r8,#1 659.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 660.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 661.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 662.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 663 veor q2,q2,q7 664 add r10,r8,#2 665.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 666.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 667 veor q3,q3,q7 668 add r8,r8,#3 669.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 670.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 671.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 672.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 673 veor q11,q11,q7 674 rev r9,r9 675.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 676.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 677 vmov.32 d1[1], r9 678 rev r10,r10 679.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 680.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 681.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 682.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 683 vmov.32 d3[1], r10 684 rev r12,r8 685.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 686.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 687 vmov.32 d21[1], r12 688 subs r2,r2,#3 689.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 690.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 691.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 692 693 veor q2,q2,q4 694 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 695 vst1.8 {q2},[r1]! 696 veor q3,q3,q5 697 mov r6,r5 698 vst1.8 {q3},[r1]! 699 veor q11,q11,q9 700 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 701 vst1.8 {q11},[r1]! 702 bhs .Loop3x_ctr32 703 704 adds r2,r2,#3 705 beq .Lctr32_done 706 cmp r2,#1 707 mov r12,#16 708 moveq r12,#0 709 710.Lctr32_tail: 711.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 712.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 713.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 714.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 715 vld1.32 {q8},[r7]! 716 subs r6,r6,#2 717.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 718.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 719.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 720.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 721 vld1.32 {q9},[r7]! 722 bgt .Lctr32_tail 723 724.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 725.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 726.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 727.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 728.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 729.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 730.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 731.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 732 vld1.8 {q2},[r0],r12 733.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 734.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 735.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 736.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 737 vld1.8 {q3},[r0] 738.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 739.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 740.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 741.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 742 veor q2,q2,q7 743.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 744.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 745.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 746.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 747 veor q3,q3,q7 748.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 749.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 750 751 cmp r2,#1 752 veor q2,q2,q0 753 veor q3,q3,q1 754 vst1.8 {q2},[r1]! 755 beq .Lctr32_done 756 vst1.8 {q3},[r1] 757 758.Lctr32_done: 759 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 760 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 761.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 762#endif 763#endif 764