1/* memset.S: optimised assembly memset
2 *
3 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13        .text
14        .p2align	4
15
16###############################################################################
17#
18# void *memset(void *p, char ch, size_t count)
19#
20# - NOTE: must not use any stack. exception detection performs function return
21#         to caller's fixup routine, aborting the remainder of the set
22#         GR4, GR7, GR8, and GR11 must be managed
23#
24###############################################################################
25        .globl		memset,__memset_end
26        .type		memset,@function
27memset:
28	orcc.p		gr10,gr0,gr5,icc3		; GR5 = count
29	andi		gr9,#0xff,gr9
30	or.p		gr8,gr0,gr4			; GR4 = address
31	beqlr		icc3,#0
32
33	# conditionally write a byte to 2b-align the address
34	setlos.p	#1,gr6
35	andicc		gr4,#1,gr0,icc0
36	ckne		icc0,cc7
37	cstb.p		gr9,@(gr4,gr0)		,cc7,#1
38	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
39	cadd.p		gr4,gr6,gr4		,cc7,#1
40	beqlr		icc3,#0
41
42	# conditionally write a word to 4b-align the address
43	andicc.p	gr4,#2,gr0,icc0
44	subicc		gr5,#2,gr0,icc1
45	setlos.p	#2,gr6
46	ckne		icc0,cc7
47	slli.p		gr9,#8,gr12			; need to double up the pattern
48	cknc		icc1,cc5
49	or.p		gr9,gr12,gr12
50	andcr		cc7,cc5,cc7
51
52	csth.p		gr12,@(gr4,gr0)		,cc7,#1
53	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
54	cadd.p		gr4,gr6,gr4		,cc7,#1
55	beqlr		icc3,#0
56
57	# conditionally write a dword to 8b-align the address
58	andicc.p	gr4,#4,gr0,icc0
59	subicc		gr5,#4,gr0,icc1
60	setlos.p	#4,gr6
61	ckne		icc0,cc7
62	slli.p		gr12,#16,gr13			; need to quadruple-up the pattern
63	cknc		icc1,cc5
64	or.p		gr13,gr12,gr12
65	andcr		cc7,cc5,cc7
66
67	cst.p		gr12,@(gr4,gr0)		,cc7,#1
68	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
69	cadd.p		gr4,gr6,gr4		,cc7,#1
70	beqlr		icc3,#0
71
72	or.p		gr12,gr12,gr13			; need to octuple-up the pattern
73
74	# the address is now 8b-aligned - loop around writing 64b chunks
75	setlos		#8,gr7
76	subi.p		gr4,#8,gr4			; store with update index does weird stuff
77	setlos		#64,gr6
78
79	subicc		gr5,#64,gr0,icc0
800:	cknc		icc0,cc7
81	cstdu		gr12,@(gr4,gr7)		,cc7,#1
82	cstdu		gr12,@(gr4,gr7)		,cc7,#1
83	cstdu		gr12,@(gr4,gr7)		,cc7,#1
84	cstdu		gr12,@(gr4,gr7)		,cc7,#1
85	cstdu		gr12,@(gr4,gr7)		,cc7,#1
86	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
87	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
88	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
89	subicc		gr5,#64,gr0,icc0
90	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
91	beqlr		icc3,#0
92	bnc		icc0,#2,0b
93
94	# now do 32-byte remnant
95	subicc.p	gr5,#32,gr0,icc0
96	setlos		#32,gr6
97	cknc		icc0,cc7
98	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
99	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
100	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
101	setlos		#16,gr6
102	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
103	subicc		gr5,#16,gr0,icc0
104	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
105	beqlr		icc3,#0
106
107	# now do 16-byte remnant
108	cknc		icc0,cc7
109	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
110	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
111	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
112	beqlr		icc3,#0
113
114	# now do 8-byte remnant
115	subicc		gr5,#8,gr0,icc1
116	cknc		icc1,cc7
117	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
118	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
119	setlos.p	#4,gr7
120	beqlr		icc3,#0
121
122	# now do 4-byte remnant
123	subicc		gr5,#4,gr0,icc0
124	addi.p		gr4,#4,gr4
125	cknc		icc0,cc7
126	cstu.p		gr12,@(gr4,gr7)		,cc7,#1
127	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
128	subicc.p	gr5,#2,gr0,icc1
129	beqlr		icc3,#0
130
131	# now do 2-byte remnant
132	setlos		#2,gr7
133	addi.p		gr4,#2,gr4
134	cknc		icc1,cc7
135	csthu.p		gr12,@(gr4,gr7)		,cc7,#1
136	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
137	subicc.p	gr5,#1,gr0,icc0
138	beqlr		icc3,#0
139
140	# now do 1-byte remnant
141	setlos		#0,gr7
142	addi.p		gr4,#2,gr4
143	cknc		icc0,cc7
144	cstb.p		gr12,@(gr4,gr0)		,cc7,#1
145	bralr
146__memset_end:
147
148	.size		memset, __memset_end-memset
149
150###############################################################################
151#
152# clear memory in userspace
153# - return the number of bytes that could not be cleared (0 on complete success)
154#
155# long __memset_user(void *p, size_t count)
156#
157###############################################################################
158        .globl		__memset_user, __memset_user_error_lr, __memset_user_error_handler
159        .type		__memset_user,@function
160__memset_user:
161	movsg		lr,gr11
162
163	# abuse memset to do the dirty work
164	or.p		gr9,gr9,gr10
165	setlos		#0,gr9
166	call		memset
167__memset_user_error_lr:
168	jmpl.p		@(gr11,gr0)
169	setlos		#0,gr8
170
171	# deal any exception generated by memset
172	# GR4  - memset's address tracking pointer
173	# GR7  - memset's step value (index register for store insns)
174	# GR8  - memset's original start address
175	# GR10 - memset's original count
176__memset_user_error_handler:
177	add.p		gr4,gr7,gr4
178	add		gr8,gr10,gr8
179	jmpl.p		@(gr11,gr0)
180	sub		gr8,gr4,gr8		; we return the amount left uncleared
181
182	.size		__memset_user, .-__memset_user
183