1b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala/*
2b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
3b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala *
4b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala *
6b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala * This program is free software; you can redistribute it and/or modify
7b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala * it under the terms of the GNU General Public License version 2 as
8b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala * published by the Free Software Foundation.
9b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala */
10b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
11b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala#include <linux/linkage.h>
12b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala#include <asm/assembler.h>
13b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
14b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.text
15b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.arch		armv8-a+crypto
16b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
17b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	k0		.req	v0
18b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	k1		.req	v1
19b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	k2		.req	v2
20b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	k3		.req	v3
21b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
22b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	t0		.req	v4
23b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	t1		.req	v5
24b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
25b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dga		.req	q6
26b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dgav		.req	v6
27b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dgb		.req	s7
28b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dgbv		.req	v7
29b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
30b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dg0q		.req	q12
31b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dg0s		.req	s12
32b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dg0v		.req	v12
33b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dg1s		.req	s13
34b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dg1v		.req	v13
35b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	dg2s		.req	s14
36b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
37b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.macro		add_only, op, ev, rc, s0, dg1
38b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.ifc		\ev, ev
39297649b9f516a840d56bc2df6dda5a6f9c50cea5Wang Dongsheng	add		t1.4s, v\s0\().4s, \rc\().4s
40297649b9f516a840d56bc2df6dda5a6f9c50cea5Wang Dongsheng	sha1h		dg2s, dg0s
41b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.ifnb		\dg1
42b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	sha1\op		dg0q, \dg1, t0.4s
43b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.else
44b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	sha1\op		dg0q, dg1s, t0.4s
45b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.endif
46b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.else
47b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.ifnb		\s0
48b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add		t0.4s, v\s0\().4s, \rc\().4s
49b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.endif
50b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	sha1h		dg1s, dg0s
51b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	sha1\op		dg0q, dg2s, t1.4s
52b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.endif
53b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.endm
54b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
55b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
56b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
57b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_only	\op, \ev, \rc, \s1, \dg1
58b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	sha1su1		v\s0\().4s, v\s3\().4s
59b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.endm
60b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
61b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/*
62b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 * The SHA1 round constants
63b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 */
64b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.align		4
65b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala.Lsha1_rcon:
66b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
67b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
68b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/*
69b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
70b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 * 			  u8 *head, long bytes)
71b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 */
72b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar GalaENTRY(sha1_ce_transform)
73b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/* load round constants */
74b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	adr		x6, .Lsha1_rcon
75b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ld1r		{k0.4s}, [x6], #4
76b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ld1r		{k1.4s}, [x6], #4
77b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ld1r		{k2.4s}, [x6], #4
78b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ld1r		{k3.4s}, [x6]
79b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
80b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/* load state */
81b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ldr		dga, [x2]
82b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ldr		dgb, [x2, #16]
83b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
84b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/* load partial state (if supplied) */
85b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	cbz		x3, 0f
86b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ld1		{v8.4s-v11.4s}, [x3]
87b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	b		1f
88b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
89b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/* load input */
90b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala0:	ld1		{v8.4s-v11.4s}, [x1], #64
91b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	sub		w0, w0, #1
92b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
93b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala1:
94b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar GalaCPU_LE(	rev32		v8.16b, v8.16b		)
95b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar GalaCPU_LE(	rev32		v9.16b, v9.16b		)
96b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar GalaCPU_LE(	rev32		v10.16b, v10.16b	)
97b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar GalaCPU_LE(	rev32		v11.16b, v11.16b	)
98b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
99b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala2:	add		t0.4s, v8.4s, k0.4s
100b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	mov		dg0v.16b, dgav.16b
101b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
102b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	c, ev, k0,  8,  9, 10, 11, dgb
103b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	c, od, k0,  9, 10, 11,  8
104b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	c, ev, k0, 10, 11,  8,  9
105b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	c, od, k0, 11,  8,  9, 10
106b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	c, ev, k1,  8,  9, 10, 11
107b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
108b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	p, od, k1,  9, 10, 11,  8
109b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	p, ev, k1, 10, 11,  8,  9
110b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	p, od, k1, 11,  8,  9, 10
111b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	p, ev, k1,  8,  9, 10, 11
112b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	p, od, k2,  9, 10, 11,  8
113b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
114b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	m, ev, k2, 10, 11,  8,  9
115b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	m, od, k2, 11,  8,  9, 10
116b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	m, ev, k2,  8,  9, 10, 11
117b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	m, od, k2,  9, 10, 11,  8
118b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	m, ev, k3, 10, 11,  8,  9
119b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
120b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_update	p, od, k3, 11,  8,  9, 10
121b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_only	p, ev, k3,  9
122b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_only	p, od, k3, 10
123b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_only	p, ev, k3, 11
124b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add_only	p, od
125b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
126b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/* update state */
127b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add		dgbv.2s, dgbv.2s, dg1v.2s
128b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	add		dgav.4s, dgav.4s, dg0v.4s
129b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
130b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	cbnz		w0, 0b
131b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala
132b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/*
133b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 * Final block: add padding and total bit count.
134b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 * Skip if we have no total byte count in x4. In that case, the input
135b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 * size was not a round multiple of the block size, and the padding is
136b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 * handled by the C code.
137b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	 */
138b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	cbz		x4, 3f
139b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	movi		v9.2d, #0
140b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	mov		x8, #0x80000000
141b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	movi		v10.2d, #0
142b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
143b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	fmov		d8, x8
144b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	mov		x4, #0
145b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	mov		v11.d[0], xzr
146465aceb832fb54b342a098143dbdc1e1ae250416Ramneek Mehresh	mov		v11.d[1], x7
147465aceb832fb54b342a098143dbdc1e1ae250416Ramneek Mehresh	b		2b
148465aceb832fb54b342a098143dbdc1e1ae250416Ramneek Mehresh
149b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	/* store new state */
150b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala3:	str		dga, [x2]
151b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala	str		dgb, [x2, #16]
152a2adb1aee19687e9f0f398abaceb31ee5a2b68b8Vakul Garg	ret
153b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar GalaENDPROC(sha1_ce_transform)
154b0e2f248b4ed6aea3191c3419e6f70407d53d8d8Kumar Gala