1/* memcpy.S: optimised assembly memcpy
2 *
3 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13        .text
14        .p2align	4
15
16###############################################################################
17#
18# void *memcpy(void *to, const char *from, size_t count)
19#
20# - NOTE: must not use any stack. exception detection performs function return
21#         to caller's fixup routine, aborting the remainder of the copy
22#
23###############################################################################
24        .globl		memcpy,__memcpy_end
25        .type		memcpy,@function
26memcpy:
27	or.p		gr8,gr9,gr4
28	orcc		gr10,gr0,gr0,icc3
29	or.p		gr10,gr4,gr4
30	beqlr		icc3,#0
31
32	# optimise based on best common alignment for to, from & count
33	andicc.p	gr4,#0x0f,gr0,icc0
34	setlos		#8,gr11
35	andicc.p	gr4,#0x07,gr0,icc1
36	beq		icc0,#0,memcpy_16
37	andicc.p	gr4,#0x03,gr0,icc0
38	beq		icc1,#0,memcpy_8
39	andicc.p	gr4,#0x01,gr0,icc1
40	beq		icc0,#0,memcpy_4
41	setlos.p	#1,gr11
42	beq		icc1,#0,memcpy_2
43
44	# do byte by byte copy
45	sub.p		gr8,gr11,gr3
46	sub		gr9,gr11,gr9
470:	ldubu.p		@(gr9,gr11),gr4
48	subicc		gr10,#1,gr10,icc0
49	stbu.p		gr4,@(gr3,gr11)
50	bne		icc0,#2,0b
51	bralr
52
53	# do halfword by halfword copy
54memcpy_2:
55	setlos		#2,gr11
56	sub.p		gr8,gr11,gr3
57	sub		gr9,gr11,gr9
580:	lduhu.p		@(gr9,gr11),gr4
59	subicc		gr10,#2,gr10,icc0
60	sthu.p		gr4,@(gr3,gr11)
61	bne		icc0,#2,0b
62	bralr
63
64	# do word by word copy
65memcpy_4:
66	setlos		#4,gr11
67	sub.p		gr8,gr11,gr3
68	sub		gr9,gr11,gr9
690:	ldu.p		@(gr9,gr11),gr4
70	subicc		gr10,#4,gr10,icc0
71	stu.p		gr4,@(gr3,gr11)
72	bne		icc0,#2,0b
73	bralr
74
75	# do double-word by double-word copy
76memcpy_8:
77	sub.p		gr8,gr11,gr3
78	sub		gr9,gr11,gr9
790:	lddu.p		@(gr9,gr11),gr4
80	subicc		gr10,#8,gr10,icc0
81	stdu.p		gr4,@(gr3,gr11)
82	bne		icc0,#2,0b
83	bralr
84
85	# do quad-word by quad-word copy
86memcpy_16:
87	sub.p		gr8,gr11,gr3
88	sub		gr9,gr11,gr9
890:	lddu		@(gr9,gr11),gr4
90	lddu.p		@(gr9,gr11),gr6
91	subicc		gr10,#16,gr10,icc0
92	stdu		gr4,@(gr3,gr11)
93	stdu.p		gr6,@(gr3,gr11)
94	bne		icc0,#2,0b
95	bralr
96__memcpy_end:
97
98	.size		memcpy, __memcpy_end-memcpy
99
100###############################################################################
101#
102# copy to/from userspace
103# - return the number of bytes that could not be copied (0 on complete success)
104#
105# long __memcpy_user(void *dst, const void *src, size_t count)
106#
107###############################################################################
108        .globl		__memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
109        .type		__memcpy_user,@function
110__memcpy_user:
111	movsg		lr,gr7
112	subi.p		sp,#8,sp
113	add		gr8,gr10,gr6		; calculate expected end address
114	stdi		gr6,@(sp,#0)
115
116	# abuse memcpy to do the dirty work
117	call		memcpy
118__memcpy_user_error_lr:
119	ldi.p		@(sp,#4),gr7
120	setlos		#0,gr8
121	jmpl.p		@(gr7,gr0)
122	addi		sp,#8,sp
123
124	# deal any exception generated by memcpy
125	# GR8 - memcpy's current dest address
126	# GR11 - memset's step value (index register for store insns)
127__memcpy_user_error_handler:
128	lddi.p		@(sp,#0),gr4		; load GR4 with dst+count, GR5 with ret addr
129	add		gr11,gr3,gr7
130	sub.p		gr4,gr7,gr8
131
132	addi		sp,#8,sp
133	jmpl		@(gr5,gr0)
134
135	.size		__memcpy_user, .-__memcpy_user
136