1116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if defined(__arm__) 2116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "arm_arch.h" 3116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles).syntax unified 503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 6116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.text 7116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.code 32 8116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 9116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type rem_4bit,%object 10116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align 5 11116680a4aac90f2aa7413d9095a592090648e557Ben Murdochrem_4bit: 12116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short 0x0000,0x1C20,0x3840,0x2460 13116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short 0x7080,0x6CA0,0x48C0,0x54E0 14116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short 0xE100,0xFD20,0xD940,0xC560 15116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short 0x9180,0x8DA0,0xA9C0,0xB5E0 16116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size rem_4bit,.-rem_4bit 17116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 18116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type rem_4bit_get,%function 19116680a4aac90f2aa7413d9095a592090648e557Ben Murdochrem_4bit_get: 20116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch sub r2,pc,#8 21116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch sub r2,r2,#32 @ &rem_4bit 22116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch b .Lrem_4bit_got 23116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch nop 24116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size rem_4bit_get,.-rem_4bit_get 25116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 26116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global gcm_ghash_4bit 275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden gcm_ghash_4bit 28116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type gcm_ghash_4bit,%function 29116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_ghash_4bit: 30116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch sub r12,pc,#8 31116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r3,r2,r3 @ r3 to point at the end 32116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch stmdb sp!,{r3-r11,lr} @ save r3/end too 33116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch sub r12,r12,#48 @ &rem_4bit 34116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 35116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r12,{r4-r11} @ copy rem_4bit ... 36116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch stmdb sp!,{r4-r11} @ ... to stack 37116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 38116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrb r12,[r2,#15] 39116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrb r14,[r0,#15] 40116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Louter: 41116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r12,r12,r14 42116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r12,#0xf0 43116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r12,r12,#0x0f 44116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r3,#14 45116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 46116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r7,r1,r12,lsl#4 47116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r7,{r4-r7} @ load Htbl[nlo] 48116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r11,r1,r14 49116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrb r12,[r2,#14] 50116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 51116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r4,#0xf @ rem 52116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r11,{r8-r11} @ load Htbl[nhi] 53116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r14,r14,r14 54116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r8,r4,lsr#4 55116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrh r8,[sp,r14] @ rem_4bit[rem] 56116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r4,r5,lsl#28 57116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrb r14,[r0,#14] 58116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r9,r5,lsr#4 59116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r5,r6,lsl#28 60116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r10,r6,lsr#4 61116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r6,r7,lsl#28 62116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r11,r7,lsr#4 63116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r12,r12,r14 64116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r12,#0xf0 65116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r12,r12,#0x0f 66116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r7,r8,lsl#16 67116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 68116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Linner: 69116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r11,r1,r12,lsl#4 70116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r12,r4,#0xf @ rem 71116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch subs r3,r3,#1 72116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r12,r12,r12 73116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r11,{r8-r11} @ load Htbl[nlo] 74116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r8,r4,lsr#4 75116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r4,r5,lsl#28 76116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r9,r5,lsr#4 77116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r5,r6,lsl#28 78116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrh r8,[sp,r12] @ rem_4bit[rem] 79116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r10,r6,lsr#4 8003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) ldrbpl r12,[r2,r3] 81116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r6,r7,lsl#28 82116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r11,r7,lsr#4 83116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 84116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r11,r1,r14 85116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r4,#0xf @ rem 86116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] 87116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r14,r14,r14 88116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r11,{r8-r11} @ load Htbl[nhi] 89116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r8,r4,lsr#4 9003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) ldrbpl r8,[r0,r3] 91116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r4,r5,lsl#28 92116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r9,r5,lsr#4 93116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrh r9,[sp,r14] 94116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r5,r6,lsl#28 95116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r10,r6,lsr#4 96116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r6,r7,lsl#28 97116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eorpl r12,r12,r8 98116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r11,r7,lsr#4 99116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch andpl r14,r12,#0xf0 100116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch andpl r12,r12,#0x0f 101116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem] 102116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch bpl .Linner 103116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 104116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldr r3,[sp,#32] @ re-load r3/end 105116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r2,r2,#16 106116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r14,r4 107116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 108116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r4,r4 109116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r4,[r0,#12] 110116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 111116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r4,[r0,#12] 112116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 113116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r4,lsr#8 114116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r4,[r0,#12+3] 115116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r4,lsr#16 116116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#12+2] 117116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r4,lsr#24 118116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#12+1] 119116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#12] 120116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 121116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch cmp r2,r3 122116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 123116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r5,r5 124116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r5,[r0,#8] 125116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 126116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r5,[r0,#8] 127116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 128116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r5,lsr#8 129116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r5,[r0,#8+3] 130116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r5,lsr#16 131116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#8+2] 132116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r5,lsr#24 133116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#8+1] 134116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#8] 135116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 13603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) ldrbne r12,[r2,#15] 137116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 138116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r6,r6 139116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r6,[r0,#4] 140116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 141116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r6,[r0,#4] 142116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 143116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r6,lsr#8 144116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r6,[r0,#4+3] 145116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r6,lsr#16 146116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#4+2] 147116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r6,lsr#24 148116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#4+1] 149116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#4] 150116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 151116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 152116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 153116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r7,r7 154116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r7,[r0,#0] 155116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 156116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r7,[r0,#0] 157116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 158116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r7,lsr#8 159116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r7,[r0,#0+3] 160116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r7,lsr#16 161116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#0+2] 162116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r7,lsr#24 163116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#0+1] 164116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#0] 165116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 166116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 167116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch bne .Louter 168116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 169116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add sp,sp,#36 170116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=5 171116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia sp!,{r4-r11,pc} 172116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 173116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia sp!,{r4-r11,lr} 174116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch tst lr,#1 175116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch moveq pc,lr @ be binary compatible with V4, yet 176116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch .word 0xe12fff1e @ interoperable with Thumb ISA:-) 177116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 178116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size gcm_ghash_4bit,.-gcm_ghash_4bit 179116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 180116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global gcm_gmult_4bit 1815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden gcm_gmult_4bit 182116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type gcm_gmult_4bit,%function 183116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_gmult_4bit: 184116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch stmdb sp!,{r4-r11,lr} 185116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrb r12,[r0,#15] 186116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch b rem_4bit_get 187116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Lrem_4bit_got: 188116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r12,#0xf0 189116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r12,r12,#0x0f 190116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r3,#14 191116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 192116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r7,r1,r12,lsl#4 193116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r7,{r4-r7} @ load Htbl[nlo] 194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrb r12,[r0,#14] 195116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 196116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r11,r1,r14 197116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r4,#0xf @ rem 198116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r11,{r8-r11} @ load Htbl[nhi] 199116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r14,r14,r14 200116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r8,r4,lsr#4 201116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrh r8,[r2,r14] @ rem_4bit[rem] 202116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r4,r5,lsl#28 203116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r9,r5,lsr#4 204116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r5,r6,lsl#28 205116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r10,r6,lsr#4 206116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r6,r7,lsl#28 207116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r11,r7,lsr#4 208116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r12,#0xf0 209116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r7,r8,lsl#16 210116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r12,r12,#0x0f 211116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 212116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Loop: 213116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r11,r1,r12,lsl#4 214116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r12,r4,#0xf @ rem 215116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch subs r3,r3,#1 216116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r12,r12,r12 217116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r11,{r8-r11} @ load Htbl[nlo] 218116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r8,r4,lsr#4 219116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r4,r5,lsl#28 220116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r9,r5,lsr#4 221116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r5,r6,lsl#28 222116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrh r8,[r2,r12] @ rem_4bit[rem] 223116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r10,r6,lsr#4 22403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) ldrbpl r12,[r0,r3] 225116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r6,r7,lsl#28 226116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r11,r7,lsr#4 227116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 228116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r11,r1,r14 229116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch and r14,r4,#0xf @ rem 230116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] 231116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch add r14,r14,r14 232116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia r11,{r8-r11} @ load Htbl[nhi] 233116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r8,r4,lsr#4 234116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r4,r4,r5,lsl#28 235116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r9,r5,lsr#4 236116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldrh r8,[r2,r14] @ rem_4bit[rem] 237116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r5,r5,r6,lsl#28 238116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r10,r6,lsr#4 239116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r6,r6,r7,lsl#28 240116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r11,r7,lsr#4 241116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch andpl r14,r12,#0xf0 242116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch andpl r12,r12,#0x0f 243116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] 244116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch bpl .Loop 245116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 246116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r4,r4 247116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r4,[r0,#12] 248116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 249116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r4,[r0,#12] 250116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 251116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r4,lsr#8 252116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r4,[r0,#12+3] 253116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r4,lsr#16 254116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#12+2] 255116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r4,lsr#24 256116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#12+1] 257116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#12] 258116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 259116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 260116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 261116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r5,r5 262116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r5,[r0,#8] 263116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 264116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r5,[r0,#8] 265116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 266116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r5,lsr#8 267116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r5,[r0,#8+3] 268116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r5,lsr#16 269116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#8+2] 270116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r5,lsr#24 271116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#8+1] 272116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#8] 273116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 274116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 275116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 276116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r6,r6 277116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r6,[r0,#4] 278116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 279116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r6,[r0,#4] 280116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 281116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r6,lsr#8 282116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r6,[r0,#4+3] 283116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r6,lsr#16 284116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#4+2] 285116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r6,lsr#24 286116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#4+1] 287116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#4] 288116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 289116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 290116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__) 291116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch rev r7,r7 292116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r7,[r0,#0] 293116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__) 294116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch str r7,[r0,#0] 295116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 296116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r9,r7,lsr#8 297116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r7,[r0,#0+3] 298116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r10,r7,lsr#16 299116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r9,[r0,#0+2] 300116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r11,r7,lsr#24 301116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r10,[r0,#0+1] 302116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch strb r11,[r0,#0] 303116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 304116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 305116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=5 306116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia sp!,{r4-r11,pc} 307116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else 308116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch ldmia sp!,{r4-r11,lr} 309116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch tst lr,#1 310116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch moveq pc,lr @ be binary compatible with V4, yet 311116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch .word 0xe12fff1e @ interoperable with Thumb ISA:-) 312116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 313116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size gcm_gmult_4bit,.-gcm_gmult_4bit 314116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 315116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.fpu neon 316116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 317116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global gcm_init_neon 3185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden gcm_init_neon 319116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type gcm_init_neon,%function 320116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align 4 321116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_init_neon: 322116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d7,[r1,:64]! @ load H 323116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i8 q8,#0xe1 324116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d6,[r1,:64] 325116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshl.i64 d17,#57 326116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshr.u64 d16,#63 @ t0=0xc2....01 327116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vdup.8 q9,d7[7] 328116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshr.u64 d26,d6,#63 329116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshr.s8 q9,#7 @ broadcast carry bit 330116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshl.i64 q3,q3,#1 331116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand q8,q8,q9 332116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vorr d7,d26 @ H<<<=1 333116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q3,q3,q8 @ twisted H 334116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vstmia r0,{q3} 335116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 336116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch .word 0xe12fff1e 337116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size gcm_init_neon,.-gcm_init_neon 338116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 339116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global gcm_gmult_neon 3405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden gcm_gmult_neon 341116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type gcm_gmult_neon,%function 342116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align 4 343116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_gmult_neon: 344116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d7,[r0,:64]! @ load Xi 345116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d6,[r0,:64]! 346116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d29,#0x0000ffffffffffff 347116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vldmia r1,{d26-d27} @ load twisted H 348116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d30,#0x00000000ffffffff 349116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__ 350116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vrev64.8 q3,q3 351116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 352116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d31,#0x000000000000ffff 353116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d28,d26,d27 @ Karatsuba pre-processing 354116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch mov r3,#16 355116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch b .Lgmult_neon 356116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size gcm_gmult_neon,.-gcm_gmult_neon 357116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 358116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global gcm_ghash_neon 3595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden gcm_ghash_neon 360116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type gcm_ghash_neon,%function 361116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align 4 362116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_ghash_neon: 363116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d1,[r0,:64]! @ load Xi 364116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d0,[r0,:64]! 365116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d29,#0x0000ffffffffffff 366116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vldmia r1,{d26-d27} @ load twisted H 367116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d30,#0x00000000ffffffff 368116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__ 369116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vrev64.8 q0,q0 370116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 371116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d31,#0x000000000000ffff 372116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d28,d26,d27 @ Karatsuba pre-processing 373116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 374116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Loop_neon: 375116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d7,[r2]! @ load inp 376116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vld1.64 d6,[r2]! 377116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__ 378116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vrev64.8 q3,q3 379116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 380116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q3,q0 @ inp^=Xi 381116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Lgmult_neon: 382116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d16, d26, d26, #1 @ A1 383116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q8, d16, d6 @ F = A1*B 384116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d0, d6, d6, #1 @ B1 385116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q0, d26, d0 @ E = A*B1 386116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d18, d26, d26, #2 @ A2 387116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q9, d18, d6 @ H = A2*B 388116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d22, d6, d6, #2 @ B2 389116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q11, d26, d22 @ G = A*B2 390116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d20, d26, d26, #3 @ A3 391116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q8, q8, q0 @ L = E + F 392116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q10, d20, d6 @ J = A3*B 393116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d0, d6, d6, #3 @ B3 394116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q9, q9, q11 @ M = G + H 395116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q0, d26, d0 @ I = A*B3 396116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 397116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d17, d17, d29 398116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d22, d6, d6, #4 @ B4 399116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 400116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d19, d19, d30 401116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q11, d26, d22 @ K = A*B4 402116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10, q10, q0 @ N = I + J 403116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d16, d16, d17 404116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d18, d18, d19 405116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 406116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d21, d21, d31 407116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q8, q8, q8, #15 408116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 409116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d23, #0 410116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q9, q9, q9, #14 411116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d20, d20, d21 412116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q0, d26, d6 @ D = A*B 413116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q11, q11, q11, #12 414116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q10, q10, q10, #13 415116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q8, q8, q9 416116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10, q10, q11 417116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q0, q0, q8 418116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q0, q0, q10 419116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d6,d6,d7 @ Karatsuba pre-processing 420116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d16, d28, d28, #1 @ A1 421116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q8, d16, d6 @ F = A1*B 422116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d2, d6, d6, #1 @ B1 423116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q1, d28, d2 @ E = A*B1 424116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d18, d28, d28, #2 @ A2 425116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q9, d18, d6 @ H = A2*B 426116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d22, d6, d6, #2 @ B2 427116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q11, d28, d22 @ G = A*B2 428116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d20, d28, d28, #3 @ A3 429116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q8, q8, q1 @ L = E + F 430116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q10, d20, d6 @ J = A3*B 431116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d2, d6, d6, #3 @ B3 432116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q9, q9, q11 @ M = G + H 433116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q1, d28, d2 @ I = A*B3 434116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 435116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d17, d17, d29 436116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d22, d6, d6, #4 @ B4 437116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 438116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d19, d19, d30 439116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q11, d28, d22 @ K = A*B4 440116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10, q10, q1 @ N = I + J 441116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d16, d16, d17 442116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d18, d18, d19 443116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 444116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d21, d21, d31 445116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q8, q8, q8, #15 446116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 447116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d23, #0 448116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q9, q9, q9, #14 449116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d20, d20, d21 450116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q1, d28, d6 @ D = A*B 451116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q11, q11, q11, #12 452116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q10, q10, q10, #13 453116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q8, q8, q9 454116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10, q10, q11 455116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q1, q1, q8 456116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q1, q1, q10 457116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d16, d27, d27, #1 @ A1 458116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q8, d16, d7 @ F = A1*B 459116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d4, d7, d7, #1 @ B1 460116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q2, d27, d4 @ E = A*B1 461116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d18, d27, d27, #2 @ A2 462116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q9, d18, d7 @ H = A2*B 463116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d22, d7, d7, #2 @ B2 464116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q11, d27, d22 @ G = A*B2 465116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d20, d27, d27, #3 @ A3 466116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q8, q8, q2 @ L = E + F 467116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q10, d20, d7 @ J = A3*B 468116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d4, d7, d7, #3 @ B3 469116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q9, q9, q11 @ M = G + H 470116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q2, d27, d4 @ I = A*B3 471116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 472116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d17, d17, d29 473116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 d22, d7, d7, #4 @ B4 474116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 475116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d19, d19, d30 476116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q11, d27, d22 @ K = A*B4 477116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10, q10, q2 @ N = I + J 478116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d16, d16, d17 479116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d18, d18, d19 480116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 481116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vand d21, d21, d31 482116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q8, q8, q8, #15 483116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 484116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmov.i64 d23, #0 485116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q9, q9, q9, #14 486116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d20, d20, d21 487116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vmull.p8 q2, d27, d7 @ D = A*B 488116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q11, q11, q11, #12 489116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vext.8 q10, q10, q10, #13 490116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q8, q8, q9 491116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10, q10, q11 492116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q2, q2, q8 493116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q2, q2, q10 494116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q1,q1,q0 @ Karatsuba post-processing 495116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q1,q1,q2 496116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d1,d1,d2 497116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d4,d4,d3 @ Xh|Xl - 256-bit result 498116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 499116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch @ equivalent of reduction_avx from ghash-x86_64.pl 500116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshl.i64 q9,q0,#57 @ 1st phase 501116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshl.i64 q10,q0,#62 502116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10,q10,q9 @ 503116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshl.i64 q9,q0,#63 504116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q10, q10, q9 @ 505116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d1,d1,d20 @ 506116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor d4,d4,d21 507116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 508116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshr.u64 q10,q0,#1 @ 2nd phase 509116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q2,q2,q0 510116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q0,q0,q10 @ 511116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshr.u64 q10,q10,#6 512116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vshr.u64 q0,q0,#1 @ 513116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q0,q0,q2 @ 514116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch veor q0,q0,q10 @ 515116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 516116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch subs r3,#16 517116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch bne .Loop_neon 518116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 519116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__ 520116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vrev64.8 q0,q0 521116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 522116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch sub r0,#16 523116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vst1.64 d1,[r0,:64]! @ write out Xi 524116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch vst1.64 d0,[r0,:64] 525116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 526116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch .word 0xe12fff1e 527116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size gcm_ghash_neon,.-gcm_ghash_neon 528116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 529116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" 530116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align 2 531116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 532116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif 533