1221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
2221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
3221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromPermission is hereby granted, free of charge, to any person obtaining
4221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstroma copy of this software and associated documentation files (the
5221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom"Software"), to deal in the Software without restriction, including
6221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromwithout limitation the rights to use, copy, modify, merge, publish,
7221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromdistribute, sublicense, and/or sell copies of the Software, and to
8221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrompermit persons to whom the Software is furnished to do so, subject to
9221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromthe following conditions:
10221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
11221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromThe above copyright notice and this permission notice shall be
12221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromincluded in all copies or substantial portions of the Software.
13221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
14221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
21221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
22221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	Common registers are assigned as follows:
23221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
24221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	COMMON
25221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
26221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t0		Const Tbl Ptr	TPtr
27221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t1		Round Constant	TRound
28221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t4		Block residual	LenResid
29221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t5		Residual Data	DTmp
30221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
31221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}0	Block 0 Cycle	RotateM0
32221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}1	Block Value 12	M12
33221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}2	Block Value 8	M8
34221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}3	Block Value 4	M4
35221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}4	Block Value 0	M0
36221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}5	Block 1 Cycle	RotateM1
37221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}6	Block Value 13	M13
38221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}7	Block Value 9	M9
39221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}8	Block Value 5	M5
40221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}9	Block Value 1	M1
41221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}10	Block 2 Cycle	RotateM2
42221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}11	Block Value 14	M14
43221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}12	Block Value 10	M10
44221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}13	Block Value 6	M6
45221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}14	Block Value 2	M2
46221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}15	Block 3 Cycle	RotateM3
47221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}16	Block Value 15	M15
48221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}17	Block Value 11	M11
49221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}18	Block Value 7	M7
50221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}19	Block Value 3	M3
51221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}20	Scratch			Z
52221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}21	Scratch			Y
53221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}22	Scratch			X
54221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}23	Scratch			W
55221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}24	Digest A		A
56221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}25	Digest B		B
57221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}26	Digest C		C
58221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}27	Digest D		D
59221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	{in,out}28	Active Data Ptr	DPtr
60221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	in28		Dummy Value		-
61221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	out28		Dummy Value		-
62221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	bt0			Coroutine Link	QUICK_RTN
63221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
64221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom///	These predicates are used for computing the padding block(s) and
65221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom///	are shared between the driver and digest co-routines
66221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
67221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt0			Extra Pad Block	pExtra
68221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt1			Load next word	pLoad
69221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt2			Skip next word	pSkip
70221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt3			Search for Pad	pNoPad
71221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt4			Pad Word 0		pPad0
72221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt5			Pad Word 1		pPad1
73221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt6			Pad Word 2		pPad2
74221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt7			Pad Word 3		pPad3
75221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
76221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	DTmp		r19
77221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	LenResid	r18
78221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	QUICK_RTN	b6
79221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	TPtr		r14
80221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	TRound		r15
81221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pExtra		p6
82221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pLoad		p7
83221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pNoPad		p9
84221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pPad0		p10
85221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pPad1		p11
86221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pPad2		p12
87221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pPad3		p13
88221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pSkip		p8
89221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
90221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	A_		out24
91221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	B_		out25
92221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	C_		out26
93221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	D_		out27
94221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	DPtr_		out28
95221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M0_		out4
96221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M1_		out9
97221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M10_		out12
98221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M11_		out17
99221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M12_		out1
100221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M13_		out6
101221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M14_		out11
102221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M15_		out16
103221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M2_		out14
104221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M3_		out19
105221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M4_		out3
106221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M5_		out8
107221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M6_		out13
108221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M7_		out18
109221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M8_		out2
110221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M9_		out7
111221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM0_	out0
112221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM1_	out5
113221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM2_	out10
114221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM3_	out15
115221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	W_		out23
116221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	X_		out22
117221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	Y_		out21
118221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	Z_		out20
119221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
120221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	A		in24
121221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	B		in25
122221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	C		in26
123221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	D		in27
124221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	DPtr		in28
125221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M0		in4
126221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M1		in9
127221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M10		in12
128221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M11		in17
129221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M12		in1
130221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M13		in6
131221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M14		in11
132221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M15		in16
133221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M2		in14
134221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M3		in19
135221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M4		in3
136221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M5		in8
137221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M6		in13
138221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M7		in18
139221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M8		in2
140221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	M9		in7
141221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM0	in0
142221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM1	in5
143221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM2	in10
144221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	RotateM3	in15
145221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	W		in23
146221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	X		in22
147221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	Y		in21
148221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	Z		in20
149221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
150221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* register stack configuration for md5_block_asm_data_order(): */
151221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	MD5_NINP	3
152221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	MD5_NLOC	0
153221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define MD5_NOUT	29
154221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define MD5_NROT	0
155221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
156221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* register stack configuration for helpers: */
157221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	_NINPUTS	MD5_NOUT
158221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	_NLOCALS	0
159221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define _NOUTPUT	0
160221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	_NROTATE	24	/* this must be <= _NINPUTS */
161221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
162221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(_HPUX_SOURCE) && !defined(_LP64)
163221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	ADDP	addp4
164221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
165221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	ADDP	add
166221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
167221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
168221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
169221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define HOST_IS_BIG_ENDIAN
170221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
171221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
172221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	Macros for getting the left and right portions of little-endian words
173221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
174221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	GETLW(dst, src, align)	dep.z dst = src, 32 - 8 * align, 8 * align
175221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	GETRW(dst, src, align)	extr.u dst = src, 8 * align, 32 - 8 * align
176221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
177221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	MD5 driver
178221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
179221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//		Reads an input block, then calls the digest block
180221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//		subroutine and adds the results to the accumulated
181221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//		digest.  It allocates 32 outs which the subroutine
182221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//		uses as it's inputs and rotating
183221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//		registers. Initializes the round constant pointer and
184221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//		takes care of saving/restoring ar.lc
185221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
186221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom///	INPUT
187221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
188221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	in0		Context Ptr		CtxPtr0
189221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	in1		Input Data Ptr		DPtrIn
190221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	in2		Integral Blocks		BlockCount
191221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	rp		Return Address		-
192221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
193221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom///	CODE
194221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
195221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	v2		Input Align		InAlign
196221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t0		Shared w/digest		-
197221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t1		Shared w/digest		-
198221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t2		Shared w/digest		-
199221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t3		Shared w/digest		-
200221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t4		Shared w/digest		-
201221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t5		Shared w/digest		-
202221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t6		PFS Save		PFSSave
203221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t7		ar.lc Save		LCSave
204221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t8		Saved PR		PRSave
205221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t9		2nd CtxPtr		CtxPtr1
206221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t10		Table Base		CTable
207221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t11		Table[0]		CTable0
208221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t13		Accumulator A		AccumA
209221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t14		Accumulator B		AccumB
210221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t15		Accumulator C		AccumC
211221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	t16		Accumulator D		AccumD
212221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt0		Shared w/digest		-
213221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt1		Shared w/digest		-
214221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt2		Shared w/digest		-
215221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt3		Shared w/digest		-
216221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt4		Shared w/digest		-
217221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt5		Shared w/digest		-
218221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt6		Shared w/digest		-
219221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt7		Shared w/digest		-
220221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt8		Not Aligned		pOff
221221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	pt8		Blocks Left		pAgain
222221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
223221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	AccumA		r27
224221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	AccumB		r28
225221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	AccumC		r29
226221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	AccumD		r30
227221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	CTable		r24
228221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	CTable0		r25
229221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	CtxPtr0		in0
230221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	CtxPtr1		r23
231221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	DPtrIn		in1
232221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	BlockCount	in2
233221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	InAlign		r10
234221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	LCSave		r21
235221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	PFSSave		r20
236221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	PRSave		r22
237221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pAgain		p63
238221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	pOff		p63
239221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
240221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.text
241221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
242221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* md5_block_asm_data_order(MD5_CTX *c, const void *data, size_t num)
243221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
244221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom     where:
245221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom      c: a pointer to a structure of this type:
246221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
247221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	   typedef struct MD5state_st
248221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	     {
249221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	       MD5_LONG A,B,C,D;
250221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	       MD5_LONG Nl,Nh;
251221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	       MD5_LONG data[MD5_LBLOCK];
252221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	       unsigned int num;
253221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	     }
254221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	   MD5_CTX;
255221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
256221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom      data: a pointer to the input data (may be misaligned)
257221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom      num:  the number of 16-byte blocks to hash (i.e., the length
258221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom            of DATA is 16*NUM.
259221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
260221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom   */
261221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
262221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.type	md5_block_asm_data_order, @function
263221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.global	md5_block_asm_data_order
264221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.align	32
265221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.proc	md5_block_asm_data_order
266221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommd5_block_asm_data_order:
267221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_block:
268221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.prologue
269221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
270221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.save	ar.pfs, PFSSave
271221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	alloc	PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
272221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ADDP	CtxPtr1 = 8, CtxPtr0
273221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	CTable = ip
274221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
275221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
276221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ADDP	DPtrIn = 0, DPtrIn
277221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ADDP	CtxPtr0 = 0, CtxPtr0
278221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.save	ar.lc, LCSave
279221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	LCSave = ar.lc
280221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
281221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom;;
282221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
283221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	CTable = .md5_tbl_data_order#-.md5_block#, CTable
284221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	InAlign = 0x3, DPtrIn
285221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
286221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
287221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
288221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	AccumA = [CtxPtr0], 4
289221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	AccumC = [CtxPtr1], 4
290221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.save pr, PRSave
291221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	PRSave = pr
292221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.body
293221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
294221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom;;
295221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
296221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	AccumB = [CtxPtr0]
297221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	AccumD = [CtxPtr1]
298221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	dep	DPtr_ = 0, DPtrIn, 0, 2
299221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
300221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#ifdef HOST_IS_BIG_ENDIAN
301221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rum	psr.be;;	// switch to little-endian
302221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
303221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmb
304221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	CTable0 = [CTable], 4
305221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.ne	pOff, p0 = 0, InAlign
306221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pOff)	br.cond.spnt.many .md5_unaligned
307221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
308221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
309221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	The FF load/compute loop rotates values three times, so that
310221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	loading into M12 here produces the M0 value, M13 -> M1, etc.
311221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
312221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_block_loop0:
313221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
314221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	M12_ = [DPtr_], 4
315221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	TPtr = CTable
316221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	TRound = CTable0
317221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
318221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
319221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	M13_ = [DPtr_], 4
320221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	A_ = AccumA
321221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	B_ = AccumB
322221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
323221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
324221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	M14_ = [DPtr_], 4
325221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	C_ = AccumC
326221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	D_ = AccumD
327221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
328221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmb
329221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	M15_ = [DPtr_], 4
330221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	BlockCount = -1, BlockCount
331221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.call.sptk.many QUICK_RTN = md5_digest_block0
332221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
333221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
334221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	Now, we add the new digest values and do some clean-up
335221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	before checking if there's another full block to process
336221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
337221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
338221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumA = AccumA, A_
339221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumB = AccumB, B_
340221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.ne	pAgain, p0 = 0, BlockCount
341221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
342221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib
343221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumC = AccumC, C_
344221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumD = AccumD, D_
345221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pAgain) br.cond.dptk.many .md5_block_loop0
346221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
347221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
348221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_exit:
349221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#ifdef HOST_IS_BIG_ENDIAN
350221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	sum	psr.be;;	// switch back to big-endian mode
351221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
352221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
353221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st4	[CtxPtr0] = AccumB, -4
354221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st4	[CtxPtr1] = AccumD, -4
355221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	pr = PRSave, 0x1ffff ;;
356221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
357221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
358221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st4	[CtxPtr0] = AccumA
359221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	st4	[CtxPtr1] = AccumC
360221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	ar.lc = LCSave
361221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
362221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib
363221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	ar.pfs = PFSSave
364221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.ret.sptk.few	rp
365221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
366221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
367221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define	MD5UNALIGNED(offset)						\
368221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_process##offset:							\
369221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;								\
370221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop	0x0	;						\
371221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	GETRW(DTmp, DTmp, offset) ;					\
372221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
373221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_block_loop##offset:						\
374221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;								\
375221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	Y_ = [DPtr_], 4 ;					\
376221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	TPtr = CTable ;						\
377221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	TRound = CTable0 ;					\
378221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
379221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;								\
380221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	M13_ = [DPtr_], 4 ;					\
381221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	A_ = AccumA ;						\
382221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	B_ = AccumB ;						\
383221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
384221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;								\
385221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	M14_ = [DPtr_], 4 ;					\
386221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	GETLW(W_, Y_, offset) ;						\
387221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	C_ = AccumC ;						\
388221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}									\
389221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;								\
390221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov	D_ = AccumD ;;						\
391221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	M12_ = W_, DTmp ;					\
392221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	GETRW(DTmp, Y_, offset) ;					\
393221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}									\
394221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;								\
395221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	M15_ = [DPtr_], 4 ;					\
396221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	BlockCount = -1, BlockCount ;				\
397221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.call.sptk.many QUICK_RTN = md5_digest_block##offset;		\
398221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
399221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;								\
400221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumA = AccumA, A_ ;					\
401221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumB = AccumB, B_ ;					\
402221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.ne	pAgain, p0 = 0, BlockCount ;				\
403221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}									\
404221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;								\
405221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumC = AccumC, C_ ;					\
406221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	AccumD = AccumD, D_ ;					\
407221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pAgain) br.cond.dptk.many .md5_block_loop##offset ;			\
408221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
409221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;								\
410221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop	0x0 ;							\
411221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop	0x0 ;							\
412221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.cond.sptk.many .md5_exit ;					\
413221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
414221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
415221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.align	32
416221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_unaligned:
417221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
418221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	Because variable shifts are expensive, we special case each of
419221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	the four alignements. In practice, this won't hurt too much
420221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//	since only one working set of code will be loaded.
421221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
422221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib
423221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4	DTmp = [DPtr_], 4
424221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.eq	pOff, p0 = 1, InAlign
425221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pOff)	br.cond.dpnt.many .md5_process1
426221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
427221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib
428221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.eq	pOff, p0 = 2, InAlign
429221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop	0x0
430221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pOff)	br.cond.dpnt.many .md5_process2
431221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
432221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	MD5UNALIGNED(3)
433221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	MD5UNALIGNED(1)
434221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	MD5UNALIGNED(2)
435221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
436221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.endp md5_block_asm_data_order
437221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
438221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
439221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// MD5 Perform the F function and load
440221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
441221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// Passed the first 4 words (M0 - M3) and initial (A, B, C, D) values,
442221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// computes the FF() round of functions, then branches to the common
443221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// digest code to finish up with GG(), HH, and II().
444221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
445221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// INPUT
446221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
447221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// rp Return Address -
448221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
449221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// CODE
450221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
451221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// v0 PFS bit bucket PFS
452221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// v1 Loop Trip Count LTrip
453221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// pt0 Load next word pMore
454221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
455221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* For F round: */
456221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define LTrip	r9
457221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define PFS	r8
458221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define pMore	p6
459221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
460221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* For GHI rounds: */
461221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define T	r9
462221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define U	r10
463221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define V	r11
464221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
465221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define COMPUTE(a, b, s, M, R)			\
466221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{						\
467221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.mii ;					\
468221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4 TRound = [TPtr], 4 ;		\
469221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	dep.z Y = Z, 32, 32 ;;			\
470221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	shrp Z = Z, Y, 64 - s ;			\
471221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
472221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{						\
473221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.mmi ;					\
474221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add a = Z, b ;				\
475221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov R = M ;				\
476221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0 ;				\
477221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
478221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
479221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define LOOP(a, b, s, M, R, label)		\
480221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
481221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4 TRound = [TPtr], 4 ;		\
482221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	dep.z Y = Z, 32, 32 ;;			\
483221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	shrp Z = Z, Y, 64 - s ;			\
484221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
485221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;					\
486221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add a = Z, b ;				\
487221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov R = M ;				\
488221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.ctop.sptk.many label ;		\
489221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
490221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
491221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// G(B, C, D) = (B & D) | (C & ~D)
492221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
493221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define G(a, b, c, d, M)			\
494221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;					\
495221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = M, TRound ;			\
496221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and Y = b, d ;				\
497221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andcm X = c, d ;			\
498221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
499221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
500221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, a ;				\
501221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or Y = Y, X ;;				\
502221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, Y ;				\
503221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
504221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
505221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// H(B, C, D) = B ^ C ^ D
506221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
507221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define H(a, b, c, d, M)			\
508221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;					\
509221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = M, TRound ;			\
510221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor Y = b, c ;				\
511221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0 ;				\
512221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
513221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
514221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, a ;				\
515221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor Y = Y, d ;;				\
516221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, Y ;				\
517221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
518221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
519221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// I(B, C, D) = C ^ (B | ~D)
520221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
521221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// However, since we have an andcm operator, we use the fact that
522221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
523221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// Y ^ Z == ~Y ^ ~Z
524221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
525221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// to rewrite the expression as
526221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
527221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// I(B, C, D) = ~C ^ (~B & D)
528221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
529221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define I(a, b, c, d, M)			\
530221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;					\
531221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = M, TRound ;			\
532221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andcm Y = d, b ;			\
533221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andcm X = -1, c ;			\
534221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
535221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
536221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, a ;				\
537221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor Y = Y, X ;;				\
538221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, Y ;				\
539221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
540221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
541221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define GG4(label)				\
542221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	G(A, B, C, D, M0)			\
543221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(A, B, 5, M0, RotateM0)		\
544221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	G(D, A, B, C, M1)			\
545221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(D, A, 9, M1, RotateM1)		\
546221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	G(C, D, A, B, M2)			\
547221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(C, D, 14, M2, RotateM2)		\
548221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	G(B, C, D, A, M3)			\
549221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	LOOP(B, C, 20, M3, RotateM3, label)
550221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
551221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define HH4(label)				\
552221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	H(A, B, C, D, M0)			\
553221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(A, B, 4, M0, RotateM0)		\
554221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	H(D, A, B, C, M1)			\
555221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(D, A, 11, M1, RotateM1)		\
556221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	H(C, D, A, B, M2)			\
557221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(C, D, 16, M2, RotateM2)		\
558221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	H(B, C, D, A, M3)			\
559221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	LOOP(B, C, 23, M3, RotateM3, label)
560221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
561221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define II4(label)				\
562221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	I(A, B, C, D, M0)			\
563221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(A, B, 6, M0, RotateM0)		\
564221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	I(D, A, B, C, M1)			\
565221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(D, A, 10, M1, RotateM1)		\
566221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	I(C, D, A, B, M2)			\
567221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	COMPUTE(C, D, 15, M2, RotateM2)		\
568221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	I(B, C, D, A, M3)			\
569221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	LOOP(B, C, 21, M3, RotateM3, label)
570221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
571221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define FFLOAD(a, b, c, d, M, N, s)		\
572221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
573221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore) ld4 N = [DPtr], 4 ;			\
574221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = M, TRound ;			\
575221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and Y = c, b ;				\
576221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}						\
577221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;					\
578221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andcm X = d, b ;;			\
579221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, a ;				\
580221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or Y = Y, X ;				\
581221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
582221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
583221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4 TRound = [TPtr], 4 ;		\
584221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, Y ;;				\
585221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	dep.z Y = Z, 32, 32 ;			\
586221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
587221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
588221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0 ;				\
589221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	shrp Z = Z, Y, 64 - s ;;		\
590221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add a = Z, b ;				\
591221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
592221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
593221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define FFLOOP(a, b, c, d, M, N, s, dest)	\
594221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
595221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore)	ld4 N = [DPtr], 4 ;			\
596221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = M, TRound ;			\
597221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and Y = c, b ;				\
598221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}						\
599221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;					\
600221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andcm X = d, b ;;			\
601221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, a ;				\
602221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or Y = Y, X ;				\
603221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
604221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
605221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4 TRound = [TPtr], 4 ;		\
606221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, Y ;;				\
607221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	dep.z Y = Z, 32, 32 ;			\
608221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
609221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
610221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0 ;				\
611221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	shrp Z = Z, Y, 64 - s ;;		\
612221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add a = Z, b ;				\
613221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}						\
614221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;					\
615221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.ne pMore, p0 = 0, LTrip ;		\
616221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add LTrip = -1, LTrip ;			\
617221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.ctop.dptk.many dest ;		\
618221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
619221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
620221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.type md5_digest_block0, @function
621221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.align 32
622221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
623221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.proc md5_digest_block0
624221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.prologue
625221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommd5_digest_block0:
626221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.altrp QUICK_RTN
627221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.body
628221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
629221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE
630221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov LTrip = 2
631221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.lc = 3
632221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
633221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii
634221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.eq pMore, p0 = r0, r0
635221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.ec = 0
636221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0
637221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
638221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
639221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_FF_round0:
640221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOAD(A, B, C, D, M12, RotateM0, 7)
641221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOAD(D, A, B, C, M13, RotateM1, 12)
642221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOAD(C, D, A, B, M14, RotateM2, 17)
643221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOOP(B, C, D, A, M15, RotateM3, 22, .md5_FF_round0)
644221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	//
645221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	// !!! Fall through to md5_digest_GHI
646221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	//
647221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.endp md5_digest_block0
648221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
649221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.type md5_digest_GHI, @function
650221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.align 32
651221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
652221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.proc md5_digest_GHI
653221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.prologue
654221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.regstk _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE
655221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommd5_digest_GHI:
656221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.altrp QUICK_RTN
657221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.body
658221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
659221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// The following sequence shuffles the block counstants round for the
660221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// next round:
661221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
662221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
663221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12
664221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
665221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
666221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov Z = M0
667221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov Y = M15
668221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.lc = 3
669221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
670221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
671221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov X = M2
672221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov W = M9
673221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov V = M4
674221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
675221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
676221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
677221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M0 = M1
678221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M15 = M12
679221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.ec = 1
680221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
681221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
682221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M2 = M11
683221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M9 = M14
684221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M4 = M5
685221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
686221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
687221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
688221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M1 = M6
689221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M12 = M13
690221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov U = M3
691221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
692221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
693221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M11 = M8
694221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M14 = M7
695221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M5 = M10
696221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
697221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
698221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
699221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M6 = Y
700221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M13 = X
701221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M3 = Z
702221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
703221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
704221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M8 = W
705221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M7 = V
706221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M10 = U
707221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
708221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
709221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_GG_round:
710221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	GG4(.md5_GG_round)
711221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
712221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// The following sequence shuffles the block constants round for the
713221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// next round:
714221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
715221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12
716221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2
717221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
718221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
719221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov Z = M0
720221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov Y = M1
721221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.lc = 3
722221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
723221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
724221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov X = M3
725221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov W = M5
726221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov V = M6
727221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
728221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
729221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
730221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M0 = M4
731221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M1 = M11
732221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.ec = 1
733221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
734221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
735221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M3 = M9
736221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov U = M8
737221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov T = M13
738221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
739221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
740221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
741221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M4 = Z
742221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M11 = Y
743221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M5 = M7
744221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
745221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
746221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M6 = M14
747221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M8 = M12
748221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M13 = M15
749221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
750221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
751221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
752221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M7 = W
753221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M14 = V
754221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0
755221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
756221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
757221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M9 = X
758221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M12 = U
759221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M15 = T
760221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
761221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
762221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_HH_round:
763221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	HH4(.md5_HH_round)
764221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
765221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// The following sequence shuffles the block constants round for the
766221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// next round:
767221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom//
768221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2
769221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom// 0 7 14 5 12 3 10 1 8 15 6 13 4 11 2 9
770221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
771221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
772221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov Z = M0
773221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov Y = M15
774221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.lc = 3
775221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
776221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
777221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov X = M10
778221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov W = M1
779221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov V = M4
780221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
781221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
782221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
783221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M0 = M9
784221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M15 = M12
785221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.ec = 1
786221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
787221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
788221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M10 = M11
789221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M1 = M6
790221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M4 = M13
791221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
792221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
793221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
794221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M9 = M14
795221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M12 = M5
796221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov U = M3
797221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
798221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
799221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M11 = M8
800221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M6 = M7
801221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M13 = M2
802221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
803221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
804221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
805221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M14 = Y
806221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M5 = X
807221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M3 = Z
808221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
809221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi
810221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M8 = W
811221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M7 = V
812221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov M2 = U
813221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
814221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
815221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_II_round:
816221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	II4(.md5_II_round)
817221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
818221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib
819221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0
820221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0
821221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.ret.sptk.many QUICK_RTN
822221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
823221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
824221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.endp md5_digest_GHI
825221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
826221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define FFLOADU(a, b, c, d, M, P, N, s, offset)	\
827221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
828221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore) ld4 N = [DPtr], 4 ;			\
829221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = M, TRound ;			\
830221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and Y = c, b ;				\
831221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}						\
832221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;					\
833221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andcm X = d, b ;;			\
834221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, a ;				\
835221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or Y = Y, X ;				\
836221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
837221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
838221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4 TRound = [TPtr], 4 ;		\
839221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	GETLW(W, P, offset) ;			\
840221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, Y ;				\
841221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
842221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
843221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or W = W, DTmp ;			\
844221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	dep.z Y = Z, 32, 32 ;;			\
845221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	shrp Z = Z, Y, 64 - s ;			\
846221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;						\
847221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;					\
848221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add a = Z, b ;				\
849221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	GETRW(DTmp, P, offset) ;		\
850221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov P = W ;				\
851221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
852221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
853221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define FFLOOPU(a, b, c, d, M, P, N, s, offset)		\
854221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;						\
855221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore) ld4 N = [DPtr], 4 ;				\
856221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = M, TRound ;				\
857221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and Y = c, b ;					\
858221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}							\
859221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;						\
860221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andcm X = d, b ;;				\
861221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, a ;					\
862221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or Y = Y, X ;					\
863221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;							\
864221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;						\
865221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ld4 TRound = [TPtr], 4 ;			\
866221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore) GETLW(W, P, offset) 	;			\
867221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add Z = Z, Y ;					\
868221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;							\
869221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;						\
870221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore) or W = W, DTmp ;				\
871221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	dep.z Y = Z, 32, 32 ;;				\
872221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	shrp Z = Z, Y, 64 - s ;				\
873221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;							\
874221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;						\
875221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add a = Z, b ;					\
876221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore) GETRW(DTmp, P, offset) 	;			\
877221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom(pMore) mov P = W ;					\
878221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}							\
879221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;						\
880221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.ne pMore, p0 = 0, LTrip ;			\
881221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add LTrip = -1, LTrip ;				\
882221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.ctop.sptk.many .md5_FF_round##offset ;	\
883221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;
884221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
885221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define MD5FBLOCK(offset)						\
886221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.type md5_digest_block##offset, @function ;			\
887221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom									\
888221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.align 32 ;							\
889221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.proc md5_digest_block##offset ;				\
890221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.prologue ;							\
891221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.altrp QUICK_RTN ;						\
892221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.body ;								\
893221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommd5_digest_block##offset:						\
894221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mmi ;								\
895221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE ;	\
896221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov LTrip = 2 ;							\
897221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.lc = 3 ;							\
898221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
899221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mii ;								\
900221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	cmp.eq pMore, p0 = r0, r0 ;					\
901221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	mov ar.ec = 0 ;							\
902221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0 ;							\
903221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
904221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom									\
905221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.pred.rel "mutex", pLoad, pSkip ;				\
906221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_FF_round##offset:							\
907221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOADU(A, B, C, D, M12, M13, RotateM0, 7, offset)		\
908221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOADU(D, A, B, C, M13, M14, RotateM1, 12, offset)		\
909221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOADU(C, D, A, B, M14, M15, RotateM2, 17, offset)		\
910221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	FFLOOPU(B, C, D, A, M15, RotateM0, RotateM3, 22, offset)	\
911221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom									\
912221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{	.mib ;								\
913221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0 ;							\
914221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	nop 0x0 ;							\
915221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	br.cond.sptk.many md5_digest_GHI ;				\
916221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} ;;									\
9173d3a1b8fcf46ca3bdb3d8f09acd6ef604624a30dBrian Carlstrom	.endp md5_digest_block##offset
918221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
919221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromMD5FBLOCK(1)
920221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromMD5FBLOCK(2)
921221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromMD5FBLOCK(3)
922221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
923221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.align 64
924221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	.type md5_constants, @object
925221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommd5_constants:
926221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.md5_tbl_data_order:			// To ensure little-endian data
927221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom					// order, code as bytes.
928221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x78, 0xa4, 0x6a, 0xd7	//     0
929221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x56, 0xb7, 0xc7, 0xe8	//     1
930221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xdb, 0x70, 0x20, 0x24	//     2
931221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xee, 0xce, 0xbd, 0xc1	//     3
932221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xaf, 0x0f, 0x7c, 0xf5	//     4
933221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x2a, 0xc6, 0x87, 0x47	//     5
934221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x13, 0x46, 0x30, 0xa8	//     6
935221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x01, 0x95, 0x46, 0xfd	//     7
936221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xd8, 0x98, 0x80, 0x69	//     8
937221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xaf, 0xf7, 0x44, 0x8b	//     9
938221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xb1, 0x5b, 0xff, 0xff	//    10
939221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xbe, 0xd7, 0x5c, 0x89	//    11
940221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x22, 0x11, 0x90, 0x6b	//    12
941221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x93, 0x71, 0x98, 0xfd	//    13
942221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x8e, 0x43, 0x79, 0xa6	//    14
943221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x21, 0x08, 0xb4, 0x49	//    15
944221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x62, 0x25, 0x1e, 0xf6	//    16
945221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x40, 0xb3, 0x40, 0xc0	//    17
946221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x51, 0x5a, 0x5e, 0x26	//    18
947221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xaa, 0xc7, 0xb6, 0xe9	//    19
948221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x5d, 0x10, 0x2f, 0xd6	//    20
949221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x53, 0x14, 0x44, 0x02	//    21
950221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x81, 0xe6, 0xa1, 0xd8	//    22
951221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xc8, 0xfb, 0xd3, 0xe7	//    23
952221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xe6, 0xcd, 0xe1, 0x21	//    24
953221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xd6, 0x07, 0x37, 0xc3	//    25
954221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x87, 0x0d, 0xd5, 0xf4	//    26
955221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xed, 0x14, 0x5a, 0x45	//    27
956221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x05, 0xe9, 0xe3, 0xa9	//    28
957221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xf8, 0xa3, 0xef, 0xfc	//    29
958221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xd9, 0x02, 0x6f, 0x67	//    30
959221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x8a, 0x4c, 0x2a, 0x8d	//    31
960221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x42, 0x39, 0xfa, 0xff	//    32
961221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x81, 0xf6, 0x71, 0x87	//    33
962221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x22, 0x61, 0x9d, 0x6d	//    34
963221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x0c, 0x38, 0xe5, 0xfd	//    35
964221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x44, 0xea, 0xbe, 0xa4	//    36
965221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xa9, 0xcf, 0xde, 0x4b	//    37
966221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x60, 0x4b, 0xbb, 0xf6	//    38
967221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x70, 0xbc, 0xbf, 0xbe	//    39
968221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xc6, 0x7e, 0x9b, 0x28	//    40
969221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xfa, 0x27, 0xa1, 0xea	//    41
970221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x85, 0x30, 0xef, 0xd4	//    42
971221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x05, 0x1d, 0x88, 0x04	//    43
972221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x39, 0xd0, 0xd4, 0xd9	//    44
973221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xe5, 0x99, 0xdb, 0xe6	//    45
974221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xf8, 0x7c, 0xa2, 0x1f	//    46
975221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x65, 0x56, 0xac, 0xc4	//    47
976221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x44, 0x22, 0x29, 0xf4	//    48
977221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x97, 0xff, 0x2a, 0x43	//    49
978221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xa7, 0x23, 0x94, 0xab	//    50
979221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x39, 0xa0, 0x93, 0xfc	//    51
980221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xc3, 0x59, 0x5b, 0x65	//    52
981221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x92, 0xcc, 0x0c, 0x8f	//    53
982221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x7d, 0xf4, 0xef, 0xff	//    54
983221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xd1, 0x5d, 0x84, 0x85	//    55
984221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x4f, 0x7e, 0xa8, 0x6f	//    56
985221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xe0, 0xe6, 0x2c, 0xfe	//    57
986221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x14, 0x43, 0x01, 0xa3	//    58
987221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xa1, 0x11, 0x08, 0x4e	//    59
988221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x82, 0x7e, 0x53, 0xf7	//    60
989221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x35, 0xf2, 0x3a, 0xbd	//    61
990221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0xbb, 0xd2, 0xd7, 0x2a	//    62
991221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	data1 0x91, 0xd3, 0x86, 0xeb	//    63
992221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom.size	md5_constants#,64*4
993