md5block_s390x.s revision 4f7f559a4b744258a796dd591b11bd88e4a6dc7a
1// Original source:
2//	http://www.zorinaq.com/papers/md5-amd64.html
3//	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
4//
5// MD5 adapted for s390x using Go's assembler for
6// s390x, based on md5block_amd64.s implementation by
7// the Go authors.
8//
9// Author: Marc Bevand <bevand_m (at) epita.fr>
10// Licence: I hereby disclaim the copyright on this code and place it
11// in the public domain.
12
13#include "textflag.h"
14
15// func block(dig *digest, p []byte)
16TEXT ·block(SB),NOSPLIT,$16-32
17	MOVD	dig+0(FP), R1
18	MOVD	p+8(FP), R6
19	MOVD	p_len+16(FP), R5
20	AND	$-64, R5
21	LAY	(R6)(R5*1), R7
22
23	LMY	0(R1), R2, R5
24	CMPBEQ	R6, R7, end
25
26loop:
27	STMY	R2, R5, tmp-16(SP)
28
29	MOVWBR	0(R6), R8
30	MOVWZ	R5, R9
31
32#define ROUND1(a, b, c, d, index, const, shift) \
33	XOR	c, R9; \
34	ADD	$const, a; \
35	ADD	R8, a; \
36	MOVWBR	(index*4)(R6), R8; \
37	AND	b, R9; \
38	XOR	d, R9; \
39	ADD	R9, a; \
40	RLL	$shift, a; \
41	MOVWZ	c, R9; \
42	ADD	b, a
43
44	ROUND1(R2,R3,R4,R5, 1,0xd76aa478, 7);
45	ROUND1(R5,R2,R3,R4, 2,0xe8c7b756,12);
46	ROUND1(R4,R5,R2,R3, 3,0x242070db,17);
47	ROUND1(R3,R4,R5,R2, 4,0xc1bdceee,22);
48	ROUND1(R2,R3,R4,R5, 5,0xf57c0faf, 7);
49	ROUND1(R5,R2,R3,R4, 6,0x4787c62a,12);
50	ROUND1(R4,R5,R2,R3, 7,0xa8304613,17);
51	ROUND1(R3,R4,R5,R2, 8,0xfd469501,22);
52	ROUND1(R2,R3,R4,R5, 9,0x698098d8, 7);
53	ROUND1(R5,R2,R3,R4,10,0x8b44f7af,12);
54	ROUND1(R4,R5,R2,R3,11,0xffff5bb1,17);
55	ROUND1(R3,R4,R5,R2,12,0x895cd7be,22);
56	ROUND1(R2,R3,R4,R5,13,0x6b901122, 7);
57	ROUND1(R5,R2,R3,R4,14,0xfd987193,12);
58	ROUND1(R4,R5,R2,R3,15,0xa679438e,17);
59	ROUND1(R3,R4,R5,R2, 0,0x49b40821,22);
60
61	MOVWBR	(1*4)(R6), R8
62	MOVWZ	R5, R9
63	MOVWZ	R5, R1
64
65#define ROUND2(a, b, c, d, index, const, shift) \
66	XOR	$0xffffffff, R9; \ // NOTW R9
67	ADD	$const, a; \
68	ADD	R8, a; \
69	MOVWBR	(index*4)(R6), R8; \
70	AND	b, R1; \
71	AND	c, R9; \
72	OR	R9, R1; \
73	MOVWZ	c, R9; \
74	ADD	R1, a; \
75	MOVWZ	c, R1; \
76	RLL	$shift,	a; \
77	ADD	b, a
78
79	ROUND2(R2,R3,R4,R5, 6,0xf61e2562, 5);
80	ROUND2(R5,R2,R3,R4,11,0xc040b340, 9);
81	ROUND2(R4,R5,R2,R3, 0,0x265e5a51,14);
82	ROUND2(R3,R4,R5,R2, 5,0xe9b6c7aa,20);
83	ROUND2(R2,R3,R4,R5,10,0xd62f105d, 5);
84	ROUND2(R5,R2,R3,R4,15, 0x2441453, 9);
85	ROUND2(R4,R5,R2,R3, 4,0xd8a1e681,14);
86	ROUND2(R3,R4,R5,R2, 9,0xe7d3fbc8,20);
87	ROUND2(R2,R3,R4,R5,14,0x21e1cde6, 5);
88	ROUND2(R5,R2,R3,R4, 3,0xc33707d6, 9);
89	ROUND2(R4,R5,R2,R3, 8,0xf4d50d87,14);
90	ROUND2(R3,R4,R5,R2,13,0x455a14ed,20);
91	ROUND2(R2,R3,R4,R5, 2,0xa9e3e905, 5);
92	ROUND2(R5,R2,R3,R4, 7,0xfcefa3f8, 9);
93	ROUND2(R4,R5,R2,R3,12,0x676f02d9,14);
94	ROUND2(R3,R4,R5,R2, 0,0x8d2a4c8a,20);
95
96	MOVWBR	(5*4)(R6), R8
97	MOVWZ	R4, R9
98
99#define ROUND3(a, b, c, d, index, const, shift) \
100	ADD	$const, a; \
101	ADD	R8, a; \
102	MOVWBR	(index*4)(R6), R8; \
103	XOR	d, R9; \
104	XOR	b, R9; \
105	ADD	R9, a; \
106	RLL	$shift, a; \
107	MOVWZ	b, R9; \
108	ADD	b, a
109
110	ROUND3(R2,R3,R4,R5, 8,0xfffa3942, 4);
111	ROUND3(R5,R2,R3,R4,11,0x8771f681,11);
112	ROUND3(R4,R5,R2,R3,14,0x6d9d6122,16);
113	ROUND3(R3,R4,R5,R2, 1,0xfde5380c,23);
114	ROUND3(R2,R3,R4,R5, 4,0xa4beea44, 4);
115	ROUND3(R5,R2,R3,R4, 7,0x4bdecfa9,11);
116	ROUND3(R4,R5,R2,R3,10,0xf6bb4b60,16);
117	ROUND3(R3,R4,R5,R2,13,0xbebfbc70,23);
118	ROUND3(R2,R3,R4,R5, 0,0x289b7ec6, 4);
119	ROUND3(R5,R2,R3,R4, 3,0xeaa127fa,11);
120	ROUND3(R4,R5,R2,R3, 6,0xd4ef3085,16);
121	ROUND3(R3,R4,R5,R2, 9, 0x4881d05,23);
122	ROUND3(R2,R3,R4,R5,12,0xd9d4d039, 4);
123	ROUND3(R5,R2,R3,R4,15,0xe6db99e5,11);
124	ROUND3(R4,R5,R2,R3, 2,0x1fa27cf8,16);
125	ROUND3(R3,R4,R5,R2, 0,0xc4ac5665,23);
126
127	MOVWBR	(0*4)(R6), R8
128	MOVWZ	$0xffffffff, R9
129	XOR	R5, R9
130
131#define ROUND4(a, b, c, d, index, const, shift) \
132	ADD	$const, a; \
133	ADD	R8, a; \
134	MOVWBR	(index*4)(R6), R8; \
135	OR	b, R9; \
136	XOR	c, R9; \
137	ADD	R9, a; \
138	MOVWZ	$0xffffffff, R9; \
139	RLL	$shift,	a; \
140	XOR	c, R9; \
141	ADD	b, a
142
143	ROUND4(R2,R3,R4,R5, 7,0xf4292244, 6);
144	ROUND4(R5,R2,R3,R4,14,0x432aff97,10);
145	ROUND4(R4,R5,R2,R3, 5,0xab9423a7,15);
146	ROUND4(R3,R4,R5,R2,12,0xfc93a039,21);
147	ROUND4(R2,R3,R4,R5, 3,0x655b59c3, 6);
148	ROUND4(R5,R2,R3,R4,10,0x8f0ccc92,10);
149	ROUND4(R4,R5,R2,R3, 1,0xffeff47d,15);
150	ROUND4(R3,R4,R5,R2, 8,0x85845dd1,21);
151	ROUND4(R2,R3,R4,R5,15,0x6fa87e4f, 6);
152	ROUND4(R5,R2,R3,R4, 6,0xfe2ce6e0,10);
153	ROUND4(R4,R5,R2,R3,13,0xa3014314,15);
154	ROUND4(R3,R4,R5,R2, 4,0x4e0811a1,21);
155	ROUND4(R2,R3,R4,R5,11,0xf7537e82, 6);
156	ROUND4(R5,R2,R3,R4, 2,0xbd3af235,10);
157	ROUND4(R4,R5,R2,R3, 9,0x2ad7d2bb,15);
158	ROUND4(R3,R4,R5,R2, 0,0xeb86d391,21);
159
160	MOVWZ	tmp-16(SP), R1
161	ADD	R1, R2
162	MOVWZ	tmp-12(SP), R1
163	ADD	R1, R3
164	MOVWZ	tmp-8(SP), R1
165	ADD	R1, R4
166	MOVWZ	tmp-4(SP), R1
167	ADD	R1, R5
168
169	LA	64(R6), R6
170	CMPBLT	R6, R7, loop
171
172end:
173	MOVD	dig+0(FP), R1
174	STMY	R2, R5, 0(R1)
175	RET
176