1116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if defined(__arm__)
2116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "arm_arch.h"
3116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles).syntax unified
503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
6116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.text
7116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.code	32
8116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
9116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type	rem_4bit,%object
10116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align	5
11116680a4aac90f2aa7413d9095a592090648e557Ben Murdochrem_4bit:
12116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short	0x0000,0x1C20,0x3840,0x2460
13116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short	0x7080,0x6CA0,0x48C0,0x54E0
14116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short	0xE100,0xFD20,0xD940,0xC560
15116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.short	0x9180,0x8DA0,0xA9C0,0xB5E0
16116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size	rem_4bit,.-rem_4bit
17116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
18116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type	rem_4bit_get,%function
19116680a4aac90f2aa7413d9095a592090648e557Ben Murdochrem_4bit_get:
20116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	sub	r2,pc,#8
21116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	sub	r2,r2,#32	@ &rem_4bit
22116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	b	.Lrem_4bit_got
23116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	nop
24116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size	rem_4bit_get,.-rem_4bit_get
25116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
26116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global	gcm_ghash_4bit
275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden	gcm_ghash_4bit
28116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type	gcm_ghash_4bit,%function
29116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_ghash_4bit:
30116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	sub	r12,pc,#8
31116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r3,r2,r3		@ r3 to point at the end
32116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	stmdb	sp!,{r3-r11,lr}		@ save r3/end too
33116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	sub	r12,r12,#48		@ &rem_4bit
34116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
35116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r12,{r4-r11}		@ copy rem_4bit ...
36116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	stmdb	sp!,{r4-r11}		@ ... to stack
37116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
38116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrb	r12,[r2,#15]
39116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrb	r14,[r0,#15]
40116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Louter:
41116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r12,r12,r14
42116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r12,#0xf0
43116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r12,r12,#0x0f
44116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r3,#14
45116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
46116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r7,r1,r12,lsl#4
47116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r7,{r4-r7}	@ load Htbl[nlo]
48116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r11,r1,r14
49116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrb	r12,[r2,#14]
50116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
51116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r4,#0xf		@ rem
52116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r11,{r8-r11}	@ load Htbl[nhi]
53116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r14,r14,r14
54116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r8,r4,lsr#4
55116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrh	r8,[sp,r14]		@ rem_4bit[rem]
56116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r4,r5,lsl#28
57116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrb	r14,[r0,#14]
58116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r9,r5,lsr#4
59116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r5,r6,lsl#28
60116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r10,r6,lsr#4
61116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r6,r7,lsl#28
62116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r11,r7,lsr#4
63116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r12,r12,r14
64116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r12,#0xf0
65116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r12,r12,#0x0f
66116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r7,r8,lsl#16
67116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
68116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Linner:
69116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r11,r1,r12,lsl#4
70116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r12,r4,#0xf		@ rem
71116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	subs	r3,r3,#1
72116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r12,r12,r12
73116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r11,{r8-r11}	@ load Htbl[nlo]
74116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r8,r4,lsr#4
75116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r4,r5,lsl#28
76116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r9,r5,lsr#4
77116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r5,r6,lsl#28
78116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrh	r8,[sp,r12]		@ rem_4bit[rem]
79116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r10,r6,lsr#4
8003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)	ldrbpl	r12,[r2,r3]
81116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r6,r7,lsl#28
82116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r11,r7,lsr#4
83116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
84116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r11,r1,r14
85116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r4,#0xf		@ rem
86116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r7,r8,lsl#16	@ ^= rem_4bit[rem]
87116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r14,r14,r14
88116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r11,{r8-r11}	@ load Htbl[nhi]
89116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r8,r4,lsr#4
9003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)	ldrbpl	r8,[r0,r3]
91116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r4,r5,lsl#28
92116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r9,r5,lsr#4
93116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrh	r9,[sp,r14]
94116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r5,r6,lsl#28
95116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r10,r6,lsr#4
96116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r6,r7,lsl#28
97116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eorpl	r12,r12,r8
98116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r11,r7,lsr#4
99116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	andpl	r14,r12,#0xf0
100116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	andpl	r12,r12,#0x0f
101116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r7,r9,lsl#16	@ ^= rem_4bit[rem]
102116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	bpl	.Linner
103116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
104116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldr	r3,[sp,#32]		@ re-load r3/end
105116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r2,r2,#16
106116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r14,r4
107116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
108116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r4,r4
109116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r4,[r0,#12]
110116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
111116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r4,[r0,#12]
112116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
113116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r4,lsr#8
114116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r4,[r0,#12+3]
115116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r4,lsr#16
116116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#12+2]
117116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r4,lsr#24
118116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#12+1]
119116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#12]
120116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
121116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	cmp	r2,r3
122116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
123116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r5,r5
124116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r5,[r0,#8]
125116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
126116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r5,[r0,#8]
127116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
128116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r5,lsr#8
129116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r5,[r0,#8+3]
130116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r5,lsr#16
131116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#8+2]
132116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r5,lsr#24
133116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#8+1]
134116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#8]
135116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
13603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)	ldrbne	r12,[r2,#15]
137116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
138116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r6,r6
139116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r6,[r0,#4]
140116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
141116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r6,[r0,#4]
142116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
143116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r6,lsr#8
144116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r6,[r0,#4+3]
145116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r6,lsr#16
146116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#4+2]
147116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r6,lsr#24
148116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#4+1]
149116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#4]
150116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
151116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
152116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
153116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r7,r7
154116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r7,[r0,#0]
155116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
156116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r7,[r0,#0]
157116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
158116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r7,lsr#8
159116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r7,[r0,#0+3]
160116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r7,lsr#16
161116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#0+2]
162116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r7,lsr#24
163116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#0+1]
164116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#0]
165116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
166116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
167116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	bne	.Louter
168116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
169116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	sp,sp,#36
170116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=5
171116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	sp!,{r4-r11,pc}
172116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
173116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	sp!,{r4-r11,lr}
174116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	tst	lr,#1
175116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	moveq	pc,lr			@ be binary compatible with V4, yet
176116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
177116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
178116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size	gcm_ghash_4bit,.-gcm_ghash_4bit
179116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
180116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global	gcm_gmult_4bit
1815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden	gcm_gmult_4bit
182116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type	gcm_gmult_4bit,%function
183116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_gmult_4bit:
184116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	stmdb	sp!,{r4-r11,lr}
185116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrb	r12,[r0,#15]
186116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	b	rem_4bit_get
187116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Lrem_4bit_got:
188116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r12,#0xf0
189116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r12,r12,#0x0f
190116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r3,#14
191116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
192116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r7,r1,r12,lsl#4
193116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r7,{r4-r7}	@ load Htbl[nlo]
194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrb	r12,[r0,#14]
195116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
196116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r11,r1,r14
197116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r4,#0xf		@ rem
198116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r11,{r8-r11}	@ load Htbl[nhi]
199116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r14,r14,r14
200116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r8,r4,lsr#4
201116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrh	r8,[r2,r14]	@ rem_4bit[rem]
202116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r4,r5,lsl#28
203116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r9,r5,lsr#4
204116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r5,r6,lsl#28
205116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r10,r6,lsr#4
206116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r6,r7,lsl#28
207116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r11,r7,lsr#4
208116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r12,#0xf0
209116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r7,r8,lsl#16
210116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r12,r12,#0x0f
211116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
212116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Loop:
213116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r11,r1,r12,lsl#4
214116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r12,r4,#0xf		@ rem
215116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	subs	r3,r3,#1
216116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r12,r12,r12
217116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r11,{r8-r11}	@ load Htbl[nlo]
218116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r8,r4,lsr#4
219116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r4,r5,lsl#28
220116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r9,r5,lsr#4
221116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r5,r6,lsl#28
222116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrh	r8,[r2,r12]	@ rem_4bit[rem]
223116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r10,r6,lsr#4
22403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)	ldrbpl	r12,[r0,r3]
225116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r6,r7,lsl#28
226116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r11,r7,lsr#4
227116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
228116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r11,r1,r14
229116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	and	r14,r4,#0xf		@ rem
230116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r7,r8,lsl#16	@ ^= rem_4bit[rem]
231116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	add	r14,r14,r14
232116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	r11,{r8-r11}	@ load Htbl[nhi]
233116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r8,r4,lsr#4
234116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r4,r4,r5,lsl#28
235116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r9,r5,lsr#4
236116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldrh	r8,[r2,r14]	@ rem_4bit[rem]
237116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r5,r5,r6,lsl#28
238116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r10,r6,lsr#4
239116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r6,r6,r7,lsl#28
240116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r11,r7,lsr#4
241116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	andpl	r14,r12,#0xf0
242116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	andpl	r12,r12,#0x0f
243116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	eor	r7,r7,r8,lsl#16	@ ^= rem_4bit[rem]
244116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	bpl	.Loop
245116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
246116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r4,r4
247116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r4,[r0,#12]
248116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
249116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r4,[r0,#12]
250116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
251116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r4,lsr#8
252116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r4,[r0,#12+3]
253116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r4,lsr#16
254116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#12+2]
255116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r4,lsr#24
256116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#12+1]
257116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#12]
258116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
259116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
260116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
261116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r5,r5
262116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r5,[r0,#8]
263116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
264116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r5,[r0,#8]
265116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
266116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r5,lsr#8
267116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r5,[r0,#8+3]
268116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r5,lsr#16
269116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#8+2]
270116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r5,lsr#24
271116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#8+1]
272116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#8]
273116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
274116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
275116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
276116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r6,r6
277116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r6,[r0,#4]
278116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
279116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r6,[r0,#4]
280116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
281116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r6,lsr#8
282116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r6,[r0,#4+3]
283116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r6,lsr#16
284116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#4+2]
285116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r6,lsr#24
286116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#4+1]
287116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#4]
288116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
289116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
290116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7 && defined(__ARMEL__)
291116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	rev	r7,r7
292116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r7,[r0,#0]
293116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#elif defined(__ARMEB__)
294116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	str	r7,[r0,#0]
295116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
296116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r9,r7,lsr#8
297116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r7,[r0,#0+3]
298116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r10,r7,lsr#16
299116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r9,[r0,#0+2]
300116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov	r11,r7,lsr#24
301116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r10,[r0,#0+1]
302116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	strb	r11,[r0,#0]
303116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
304116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
305116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=5
306116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	sp!,{r4-r11,pc}
307116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#else
308116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	ldmia	sp!,{r4-r11,lr}
309116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	tst	lr,#1
310116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	moveq	pc,lr			@ be binary compatible with V4, yet
311116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
312116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
313116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size	gcm_gmult_4bit,.-gcm_gmult_4bit
314116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#if __ARM_ARCH__>=7
315116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.fpu	neon
316116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
317116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global	gcm_init_neon
3185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden	gcm_init_neon
319116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type	gcm_init_neon,%function
320116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align	4
321116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_init_neon:
322116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d7,[r1,:64]!	@ load H
323116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i8		q8,#0xe1
324116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d6,[r1,:64]
325116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshl.i64	d17,#57
326116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshr.u64	d16,#63		@ t0=0xc2....01
327116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vdup.8		q9,d7[7]
328116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshr.u64	d26,d6,#63
329116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshr.s8		q9,#7			@ broadcast carry bit
330116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshl.i64	q3,q3,#1
331116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		q8,q8,q9
332116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vorr		d7,d26		@ H<<<=1
333116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q3,q3,q8		@ twisted H
334116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vstmia		r0,{q3}
335116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
336116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	.word	0xe12fff1e
337116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size	gcm_init_neon,.-gcm_init_neon
338116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
339116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global	gcm_gmult_neon
3405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden	gcm_gmult_neon
341116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type	gcm_gmult_neon,%function
342116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align	4
343116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_gmult_neon:
344116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d7,[r0,:64]!	@ load Xi
345116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d6,[r0,:64]!
346116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d29,#0x0000ffffffffffff
347116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vldmia		r1,{d26-d27}	@ load twisted H
348116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d30,#0x00000000ffffffff
349116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__
350116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vrev64.8	q3,q3
351116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
352116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d31,#0x000000000000ffff
353116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d28,d26,d27		@ Karatsuba pre-processing
354116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	mov		r3,#16
355116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	b		.Lgmult_neon
356116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size	gcm_gmult_neon,.-gcm_gmult_neon
357116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
358116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.global	gcm_ghash_neon
3595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles).hidden	gcm_ghash_neon
360116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.type	gcm_ghash_neon,%function
361116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align	4
362116680a4aac90f2aa7413d9095a592090648e557Ben Murdochgcm_ghash_neon:
363116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d1,[r0,:64]!	@ load Xi
364116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d0,[r0,:64]!
365116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d29,#0x0000ffffffffffff
366116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vldmia		r1,{d26-d27}	@ load twisted H
367116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d30,#0x00000000ffffffff
368116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__
369116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vrev64.8	q0,q0
370116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
371116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d31,#0x000000000000ffff
372116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d28,d26,d27		@ Karatsuba pre-processing
373116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
374116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Loop_neon:
375116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d7,[r2]!		@ load inp
376116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vld1.64		d6,[r2]!
377116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__
378116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vrev64.8	q3,q3
379116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
380116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q3,q0			@ inp^=Xi
381116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.Lgmult_neon:
382116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d16, d26, d26, #1	@ A1
383116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q8, d16, d6		@ F = A1*B
384116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d0, d6, d6, #1	@ B1
385116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q0, d26, d0		@ E = A*B1
386116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d18, d26, d26, #2	@ A2
387116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q9, d18, d6		@ H = A2*B
388116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d22, d6, d6, #2	@ B2
389116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q11, d26, d22		@ G = A*B2
390116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d20, d26, d26, #3	@ A3
391116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q8, q8, q0		@ L = E + F
392116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q10, d20, d6		@ J = A3*B
393116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d0, d6, d6, #3	@ B3
394116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q9, q9, q11		@ M = G + H
395116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q0, d26, d0		@ I = A*B3
396116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
397116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d17, d17, d29
398116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d22, d6, d6, #4	@ B4
399116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
400116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d19, d19, d30
401116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q11, d26, d22		@ K = A*B4
402116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10, q10, q0		@ N = I + J
403116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d16, d16, d17
404116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d18, d18, d19
405116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
406116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d21, d21, d31
407116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q8, q8, q8, #15
408116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
409116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d23, #0
410116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q9, q9, q9, #14
411116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d20, d20, d21
412116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q0, d26, d6		@ D = A*B
413116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q11, q11, q11, #12
414116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q10, q10, q10, #13
415116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q8, q8, q9
416116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10, q10, q11
417116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q0, q0, q8
418116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q0, q0, q10
419116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d6,d6,d7	@ Karatsuba pre-processing
420116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d16, d28, d28, #1	@ A1
421116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q8, d16, d6		@ F = A1*B
422116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d2, d6, d6, #1	@ B1
423116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q1, d28, d2		@ E = A*B1
424116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d18, d28, d28, #2	@ A2
425116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q9, d18, d6		@ H = A2*B
426116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d22, d6, d6, #2	@ B2
427116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q11, d28, d22		@ G = A*B2
428116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d20, d28, d28, #3	@ A3
429116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q8, q8, q1		@ L = E + F
430116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q10, d20, d6		@ J = A3*B
431116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d2, d6, d6, #3	@ B3
432116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q9, q9, q11		@ M = G + H
433116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q1, d28, d2		@ I = A*B3
434116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
435116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d17, d17, d29
436116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d22, d6, d6, #4	@ B4
437116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
438116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d19, d19, d30
439116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q11, d28, d22		@ K = A*B4
440116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10, q10, q1		@ N = I + J
441116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d16, d16, d17
442116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d18, d18, d19
443116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
444116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d21, d21, d31
445116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q8, q8, q8, #15
446116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
447116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d23, #0
448116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q9, q9, q9, #14
449116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d20, d20, d21
450116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q1, d28, d6		@ D = A*B
451116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q11, q11, q11, #12
452116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q10, q10, q10, #13
453116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q8, q8, q9
454116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10, q10, q11
455116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q1, q1, q8
456116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q1, q1, q10
457116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d16, d27, d27, #1	@ A1
458116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q8, d16, d7		@ F = A1*B
459116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d4, d7, d7, #1	@ B1
460116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q2, d27, d4		@ E = A*B1
461116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d18, d27, d27, #2	@ A2
462116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q9, d18, d7		@ H = A2*B
463116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d22, d7, d7, #2	@ B2
464116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q11, d27, d22		@ G = A*B2
465116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d20, d27, d27, #3	@ A3
466116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q8, q8, q2		@ L = E + F
467116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q10, d20, d7		@ J = A3*B
468116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d4, d7, d7, #3	@ B3
469116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q9, q9, q11		@ M = G + H
470116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q2, d27, d4		@ I = A*B3
471116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
472116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d17, d17, d29
473116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		d22, d7, d7, #4	@ B4
474116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
475116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d19, d19, d30
476116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q11, d27, d22		@ K = A*B4
477116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10, q10, q2		@ N = I + J
478116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d16, d16, d17
479116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d18, d18, d19
480116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
481116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vand		d21, d21, d31
482116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q8, q8, q8, #15
483116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
484116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmov.i64	d23, #0
485116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q9, q9, q9, #14
486116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d20, d20, d21
487116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vmull.p8	q2, d27, d7		@ D = A*B
488116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q11, q11, q11, #12
489116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vext.8		q10, q10, q10, #13
490116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q8, q8, q9
491116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10, q10, q11
492116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q2, q2, q8
493116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q2, q2, q10
494116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q1,q1,q0		@ Karatsuba post-processing
495116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q1,q1,q2
496116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d1,d1,d2
497116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d4,d4,d3	@ Xh|Xl - 256-bit result
498116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
499116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	@ equivalent of reduction_avx from ghash-x86_64.pl
500116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshl.i64	q9,q0,#57		@ 1st phase
501116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshl.i64	q10,q0,#62
502116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10,q10,q9		@
503116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshl.i64	q9,q0,#63
504116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q10, q10, q9		@
505116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 	veor		d1,d1,d20	@
506116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		d4,d4,d21
507116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
508116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshr.u64	q10,q0,#1		@ 2nd phase
509116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q2,q2,q0
510116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q0,q0,q10		@
511116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshr.u64	q10,q10,#6
512116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vshr.u64	q0,q0,#1		@
513116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q0,q0,q2		@
514116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	veor		q0,q0,q10		@
515116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
516116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	subs		r3,#16
517116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	bne		.Loop_neon
518116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
519116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifdef __ARMEL__
520116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vrev64.8	q0,q0
521116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
522116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	sub		r0,#16
523116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vst1.64		d1,[r0,:64]!	@ write out Xi
524116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	vst1.64		d0,[r0,:64]
525116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
526116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch	.word	0xe12fff1e
527116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.size	gcm_ghash_neon,.-gcm_ghash_neon
528116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
529116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.asciz  "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
530116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch.align  2
531116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
532116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#endif
533