1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "textflag.h"
6
7// SHA256 block routine. See sha256block.go for Go equivalent.
8//
9// The algorithm is detailed in FIPS 180-4:
10//
11//  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
12//
13// Wt = Mt; for 0 <= t <= 15
14// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
15//
16// a = H0
17// b = H1
18// c = H2
19// d = H3
20// e = H4
21// f = H5
22// g = H6
23// h = H7
24//
25// for t = 0 to 63 {
26//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
27//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
28//    h = g
29//    g = f
30//    f = e
31//    e = d + T1
32//    d = c
33//    c = b
34//    b = a
35//    a = T1 + T2
36// }
37//
38// H0 = a + H0
39// H1 = b + H1
40// H2 = c + H2
41// H3 = d + H3
42// H4 = e + H4
43// H5 = f + H5
44// H6 = g + H6
45// H7 = h + H7
46
47// Wt = Mt; for 0 <= t <= 15
48#define MSGSCHEDULE0(index) \
49	MOVWZ	(index*4)(R26), R7; \
50	RLWNM	$24, R7, $-1, R11; \
51	RLWMI	$8, R7, $0x00FF0000, R11; \
52	RLWMI	$8, R7, $0x000000FF, R11; \
53	MOVWZ	R11, R7; \
54	MOVWZ	R7, (index*4)(R27)
55
56// Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
57//   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
58//   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
59#define MSGSCHEDULE1(index) \
60	MOVWZ	((index-2)*4)(R27), R7; \
61	MOVWZ	R7, R9; \
62	RLWNM	$32-17, R7, $-1, R7; \
63	MOVWZ	R9, R10; \
64	RLWNM	$32-19, R9, $-1, R9; \
65	SRW	$10, R10; \
66	MOVWZ	((index-15)*4)(R27), R8; \
67	XOR	R9, R7; \
68	MOVWZ	R8, R9; \
69	XOR	R10, R7; \
70	RLWNM	$32-7, R8, $-1, R8; \
71	MOVWZ	R9, R10; \
72	SRW	$3, R10; \
73	RLWNM	$32-18, R9, $-1, R9; \
74	MOVWZ	((index-7)*4)(R27), R11; \
75	ADD	R11, R7; \
76	XOR	R9, R8; \
77	XOR	R10, R8; \
78	MOVWZ	((index-16)*4)(R27), R11; \
79	ADD	R11, R8; \
80	ADD	R8, R7; \
81	MOVWZ	R7, ((index)*4)(R27)
82
83// T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
84//   BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
85//   Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
86#define SHA256T1(const, e, f, g, h) \
87	ADD	R7, h; \
88	MOVWZ	e, R7; \
89	ADD	$const, h; \
90	MOVWZ	e, R9; \
91	RLWNM	$32-6, R7, $-1, R7; \
92	MOVWZ	e, R10; \
93	RLWNM	$32-11, R9, $-1, R9; \
94	XOR	R9, R7; \
95	MOVWZ	e, R9; \
96	RLWNM	$32-25, R10, $-1, R10; \
97	AND	f, R9; \
98	XOR	R7, R10; \
99	MOVWZ	e, R7; \
100	NOR	R7, R7, R7; \
101	ADD	R10, h; \
102	AND	g, R7; \
103	XOR	R9, R7; \
104	ADD	h, R7
105
106// T2 = BIGSIGMA0(a) + Maj(a, b, c)
107//   BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
108//   Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
109#define SHA256T2(a, b, c) \
110	MOVWZ	a, R28; \
111	MOVWZ	c, R8; \
112	RLWNM	$32-2, R28, $-1, R28; \
113	MOVWZ	a, R10; \
114	AND	b, R8; \
115	RLWNM	$32-13, R10, $-1, R10; \
116	MOVWZ	a, R9; \
117	AND	c, R9; \
118	XOR	R10, R28; \
119	XOR	R9, R8; \
120	MOVWZ	a, R10; \
121	MOVWZ	b, R9; \
122	RLWNM	$32-22, R10, $-1, R10; \
123	AND	a, R9; \
124	XOR	R9, R8; \
125	XOR	R10, R28; \
126	ADD	R28, R8
127
128// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
129// The values for e and a are stored in d and h, ready for rotation.
130#define SHA256ROUND(index, const, a, b, c, d, e, f, g, h) \
131	SHA256T1(const, e, f, g, h); \
132	SHA256T2(a, b, c); \
133	MOVWZ	R8, h; \
134	ADD	R7, d; \
135	ADD	R7, h
136
137#define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
138	MSGSCHEDULE0(index); \
139	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
140
141#define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
142	MSGSCHEDULE1(index); \
143	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
144
145// func block(dig *digest, p []byte)
146TEXT ·block(SB),0,$296-32
147	MOVD	p_base+8(FP), R26
148	MOVD	p_len+16(FP), R29
149	SRD	$6, R29
150	SLD	$6, R29
151
152	ADD	R26, R29, R28
153
154	MOVD	R28, 256(R1)
155	CMP	R26, R28
156	BEQ	end
157
158	MOVD	dig+0(FP), R27
159	MOVWZ	(0*4)(R27), R14		// a = H0
160	MOVWZ	(1*4)(R27), R15		// b = H1
161	MOVWZ	(2*4)(R27), R16		// c = H2
162	MOVWZ	(3*4)(R27), R17		// d = H3
163	MOVWZ	(4*4)(R27), R18		// e = H4
164	MOVWZ	(5*4)(R27), R19		// f = H5
165	MOVWZ	(6*4)(R27), R20		// g = H6
166	MOVWZ	(7*4)(R27), R21		// h = H7
167
168loop:
169	MOVD	R1, R27		// R27: message schedule
170
171	SHA256ROUND0(0, 0x428a2f98, R14, R15, R16, R17, R18, R19, R20, R21)
172	SHA256ROUND0(1, 0x71374491, R21, R14, R15, R16, R17, R18, R19, R20)
173	SHA256ROUND0(2, 0xb5c0fbcf, R20, R21, R14, R15, R16, R17, R18, R19)
174	SHA256ROUND0(3, 0xe9b5dba5, R19, R20, R21, R14, R15, R16, R17, R18)
175	SHA256ROUND0(4, 0x3956c25b, R18, R19, R20, R21, R14, R15, R16, R17)
176	SHA256ROUND0(5, 0x59f111f1, R17, R18, R19, R20, R21, R14, R15, R16)
177	SHA256ROUND0(6, 0x923f82a4, R16, R17, R18, R19, R20, R21, R14, R15)
178	SHA256ROUND0(7, 0xab1c5ed5, R15, R16, R17, R18, R19, R20, R21, R14)
179	SHA256ROUND0(8, 0xd807aa98, R14, R15, R16, R17, R18, R19, R20, R21)
180	SHA256ROUND0(9, 0x12835b01, R21, R14, R15, R16, R17, R18, R19, R20)
181	SHA256ROUND0(10, 0x243185be, R20, R21, R14, R15, R16, R17, R18, R19)
182	SHA256ROUND0(11, 0x550c7dc3, R19, R20, R21, R14, R15, R16, R17, R18)
183	SHA256ROUND0(12, 0x72be5d74, R18, R19, R20, R21, R14, R15, R16, R17)
184	SHA256ROUND0(13, 0x80deb1fe, R17, R18, R19, R20, R21, R14, R15, R16)
185	SHA256ROUND0(14, 0x9bdc06a7, R16, R17, R18, R19, R20, R21, R14, R15)
186	SHA256ROUND0(15, 0xc19bf174, R15, R16, R17, R18, R19, R20, R21, R14)
187
188	SHA256ROUND1(16, 0xe49b69c1, R14, R15, R16, R17, R18, R19, R20, R21)
189	SHA256ROUND1(17, 0xefbe4786, R21, R14, R15, R16, R17, R18, R19, R20)
190	SHA256ROUND1(18, 0x0fc19dc6, R20, R21, R14, R15, R16, R17, R18, R19)
191	SHA256ROUND1(19, 0x240ca1cc, R19, R20, R21, R14, R15, R16, R17, R18)
192	SHA256ROUND1(20, 0x2de92c6f, R18, R19, R20, R21, R14, R15, R16, R17)
193	SHA256ROUND1(21, 0x4a7484aa, R17, R18, R19, R20, R21, R14, R15, R16)
194	SHA256ROUND1(22, 0x5cb0a9dc, R16, R17, R18, R19, R20, R21, R14, R15)
195	SHA256ROUND1(23, 0x76f988da, R15, R16, R17, R18, R19, R20, R21, R14)
196	SHA256ROUND1(24, 0x983e5152, R14, R15, R16, R17, R18, R19, R20, R21)
197	SHA256ROUND1(25, 0xa831c66d, R21, R14, R15, R16, R17, R18, R19, R20)
198	SHA256ROUND1(26, 0xb00327c8, R20, R21, R14, R15, R16, R17, R18, R19)
199	SHA256ROUND1(27, 0xbf597fc7, R19, R20, R21, R14, R15, R16, R17, R18)
200	SHA256ROUND1(28, 0xc6e00bf3, R18, R19, R20, R21, R14, R15, R16, R17)
201	SHA256ROUND1(29, 0xd5a79147, R17, R18, R19, R20, R21, R14, R15, R16)
202	SHA256ROUND1(30, 0x06ca6351, R16, R17, R18, R19, R20, R21, R14, R15)
203	SHA256ROUND1(31, 0x14292967, R15, R16, R17, R18, R19, R20, R21, R14)
204	SHA256ROUND1(32, 0x27b70a85, R14, R15, R16, R17, R18, R19, R20, R21)
205	SHA256ROUND1(33, 0x2e1b2138, R21, R14, R15, R16, R17, R18, R19, R20)
206	SHA256ROUND1(34, 0x4d2c6dfc, R20, R21, R14, R15, R16, R17, R18, R19)
207	SHA256ROUND1(35, 0x53380d13, R19, R20, R21, R14, R15, R16, R17, R18)
208	SHA256ROUND1(36, 0x650a7354, R18, R19, R20, R21, R14, R15, R16, R17)
209	SHA256ROUND1(37, 0x766a0abb, R17, R18, R19, R20, R21, R14, R15, R16)
210	SHA256ROUND1(38, 0x81c2c92e, R16, R17, R18, R19, R20, R21, R14, R15)
211	SHA256ROUND1(39, 0x92722c85, R15, R16, R17, R18, R19, R20, R21, R14)
212	SHA256ROUND1(40, 0xa2bfe8a1, R14, R15, R16, R17, R18, R19, R20, R21)
213	SHA256ROUND1(41, 0xa81a664b, R21, R14, R15, R16, R17, R18, R19, R20)
214	SHA256ROUND1(42, 0xc24b8b70, R20, R21, R14, R15, R16, R17, R18, R19)
215	SHA256ROUND1(43, 0xc76c51a3, R19, R20, R21, R14, R15, R16, R17, R18)
216	SHA256ROUND1(44, 0xd192e819, R18, R19, R20, R21, R14, R15, R16, R17)
217	SHA256ROUND1(45, 0xd6990624, R17, R18, R19, R20, R21, R14, R15, R16)
218	SHA256ROUND1(46, 0xf40e3585, R16, R17, R18, R19, R20, R21, R14, R15)
219	SHA256ROUND1(47, 0x106aa070, R15, R16, R17, R18, R19, R20, R21, R14)
220	SHA256ROUND1(48, 0x19a4c116, R14, R15, R16, R17, R18, R19, R20, R21)
221	SHA256ROUND1(49, 0x1e376c08, R21, R14, R15, R16, R17, R18, R19, R20)
222	SHA256ROUND1(50, 0x2748774c, R20, R21, R14, R15, R16, R17, R18, R19)
223	SHA256ROUND1(51, 0x34b0bcb5, R19, R20, R21, R14, R15, R16, R17, R18)
224	SHA256ROUND1(52, 0x391c0cb3, R18, R19, R20, R21, R14, R15, R16, R17)
225	SHA256ROUND1(53, 0x4ed8aa4a, R17, R18, R19, R20, R21, R14, R15, R16)
226	SHA256ROUND1(54, 0x5b9cca4f, R16, R17, R18, R19, R20, R21, R14, R15)
227	SHA256ROUND1(55, 0x682e6ff3, R15, R16, R17, R18, R19, R20, R21, R14)
228	SHA256ROUND1(56, 0x748f82ee, R14, R15, R16, R17, R18, R19, R20, R21)
229	SHA256ROUND1(57, 0x78a5636f, R21, R14, R15, R16, R17, R18, R19, R20)
230	SHA256ROUND1(58, 0x84c87814, R20, R21, R14, R15, R16, R17, R18, R19)
231	SHA256ROUND1(59, 0x8cc70208, R19, R20, R21, R14, R15, R16, R17, R18)
232	SHA256ROUND1(60, 0x90befffa, R18, R19, R20, R21, R14, R15, R16, R17)
233	SHA256ROUND1(61, 0xa4506ceb, R17, R18, R19, R20, R21, R14, R15, R16)
234	SHA256ROUND1(62, 0xbef9a3f7, R16, R17, R18, R19, R20, R21, R14, R15)
235	SHA256ROUND1(63, 0xc67178f2, R15, R16, R17, R18, R19, R20, R21, R14)
236
237	MOVD	dig+0(FP), R27
238	MOVWZ	(0*4)(R27), R11
239	ADD	R11, R14	// H0 = a + H0
240	MOVWZ	R14, (0*4)(R27)
241	MOVWZ	(1*4)(R27), R11
242	ADD	R11, R15	// H1 = b + H1
243	MOVWZ	R15, (1*4)(R27)
244	MOVWZ	(2*4)(R27), R11
245	ADD	R11, R16	// H2 = c + H2
246	MOVWZ	R16, (2*4)(R27)
247	MOVWZ	(3*4)(R27), R11
248	ADD	R11, R17	// H3 = d + H3
249	MOVWZ	R17, (3*4)(R27)
250	MOVWZ	(4*4)(R27), R11
251	ADD	R11, R18	// H4 = e + H4
252	MOVWZ	R18, (4*4)(R27)
253	MOVWZ	(5*4)(R27), R11
254	ADD	R11, R19	// H5 = f + H5
255	MOVWZ	R19, (5*4)(R27)
256	MOVWZ	(6*4)(R27), R11
257	ADD	R11, R20	// H6 = g + H6
258	MOVWZ	R20, (6*4)(R27)
259	MOVWZ	(7*4)(R27), R11
260	ADD	R11, R21	// H7 = h + H7
261	MOVWZ	R21, (7*4)(R27)
262
263	ADD	$64, R26
264	MOVD	256(R1), R11
265	CMPU	R26, R11
266	BLT	loop
267
268end:
269	RET
270