1/*	$OpenBSD: memcpy.S,v 1.1.1.1 2006/10/10 22:07:10 miod Exp $	*/
2/*	$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $	*/
3
4/*
5 * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <machine/asm.h>
32
33#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
34#define MEMCOPY
35#endif
36
37#if defined(MEMCOPY) || defined(MEMMOVE)
38#define	REG_DST0	r3
39#define	REG_SRC		r5
40#define	REG_DST		r4
41#else
42#define	REG_SRC		r4
43#define	REG_DST		r5
44#endif
45
46#define	REG_LEN		r6
47
48#if defined(MEMCOPY)
49ENTRY(memcpy)
50#elif defined(MEMMOVE)
51ENTRY(memmove)
52#elif defined(BCOPY)
53ENTRY(bcopy)
54#endif
55#ifdef REG_DST0
56	mov	REG_DST,REG_DST0
57#endif
58	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
59	bt/s	bcopy_return
60	cmp/hi	REG_DST,REG_SRC
61	bf/s	bcopy_overlap
62
63	mov	REG_SRC,r0
64	xor	REG_DST,r0
65	and	#3,r0
66	mov	r0,r1
67	tst	r0,r0		/* (src ^ dst) & 3         */
68	bf/s	word_align
69
70longword_align:
71	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
72	bt/s	bcopy_return
73
74
75	mov	REG_SRC,r0
76	tst	#1,r0		/* if ( src & 1 )          */
77	bt	1f
78	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
79	add	#-1,REG_LEN
80	mov.b	r0,@REG_DST
81	add	#1,REG_DST
821:
83
84
85	mov	#1,r0
86	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
87	bf/s	1f
88	mov	REG_SRC,r0
89	tst	#2,r0		/*      (src & 2) {        */
90	bt	1f
91	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
92	add	#-2,REG_LEN	/*        len -= 2;                                              */
93	mov.w	r0,@REG_DST
94	add	#2,REG_DST	/* }                       */
951:
96
97
98	mov	#3,r1
99	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
100	bf/s	no_align_delay
101	tst	REG_LEN,REG_LEN
1022:
103	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
104	add	#-4,REG_LEN	/*   len -= 4;                                                   */
105	mov.l	r0,@REG_DST
106	cmp/hi	r1,REG_LEN
107	bt/s	2b
108	add	#4,REG_DST	/* }                       */
109
110	bra	no_align_delay
111	tst	REG_LEN,REG_LEN
112
113
114word_align:
115	mov	r1,r0
116	tst	#1,r0
117	bf/s	no_align_delay
118	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
119	bt	bcopy_return
120
121
122	mov	REG_SRC,r0	/* if ( src & 1 )          */
123	tst	#1,r0
124	bt	1f
125	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
126	add	#-1,REG_LEN
127	mov.b	r0,@REG_DST
128	add	#1,REG_DST
1291:
130
131
132	mov	#1,r1
133	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
134	bf/s	no_align_delay
135	tst	REG_LEN,REG_LEN
1362:
137	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
138	add	#-2,REG_LEN	/*   len -= 2;                                                   */
139	mov.w	r0,@REG_DST
140	cmp/hi	r1,REG_LEN
141	bt/s	2b
142	add	#2,REG_DST	/* }                       */
143
144
145no_align:
146	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
147no_align_delay:
148	bt	bcopy_return
1491:
150	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
151	add	#-1,REG_LEN	/*    len--;               */
152	mov.b	r0,@REG_DST
153	tst	REG_LEN,REG_LEN
154	bf/s	1b
155	add	#1,REG_DST	/* }                       */
156bcopy_return:
157	rts
158#ifdef REG_DST0
159	mov	REG_DST0,r0
160#else
161	nop
162#endif
163
164
165bcopy_overlap:
166	add	REG_LEN,REG_SRC
167	add	REG_LEN,REG_DST
168
169	mov	REG_SRC,r0
170	xor	REG_DST,r0
171	and	#3,r0
172	mov	r0,r1
173	tst	r0,r0		/* (src ^ dst) & 3         */
174	bf/s	ov_word_align
175
176ov_longword_align:
177	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
178	bt/s	bcopy_return
179
180
181	mov	REG_SRC,r0
182	tst	#1,r0		/* if ( src & 1 )          */
183	bt	1f
184	add	#-1,REG_SRC	/*    *--dst = *--src;     */
185	mov.b	@REG_SRC,r0
186	mov.b	r0,@-REG_DST
187	add	#-1,REG_LEN
1881:
189
190
191	mov	#1,r0
192	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
193	bf/s	1f
194	mov	REG_SRC,r0
195	tst	#2,r0		/*      (src & 2) {        */
196	bt	1f
197	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
198	mov.w	@REG_SRC,r0
199	add	#-2,REG_LEN	/*        len -= 2;                                              */
200	mov.w	r0,@-REG_DST	/* }                       */
2011:
202
203
204	mov	#3,r1
205	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
206	bf/s	ov_no_align_delay
207	tst	REG_LEN,REG_LEN
2082:
209	add	#-4,REG_SRC
210	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
211	add	#-4,REG_LEN	/*   len -= 4;                                                   */
212	cmp/hi	r1,REG_LEN
213	bt/s	2b
214	mov.l	r0,@-REG_DST	/* }                       */
215
216	bra	ov_no_align_delay
217	tst	REG_LEN,REG_LEN
218
219
220ov_word_align:
221	mov	r1,r0
222	tst	#1,r0
223	bf/s	ov_no_align_delay
224	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
225	bt	bcopy_return
226
227
228	mov	REG_SRC,r0	/* if ( src & 1 )          */
229	tst	#1,r0
230	bt	1f
231	add	#-1,REG_SRC
232	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
233	add	#-1,REG_LEN
234	mov.b	r0,@-REG_DST
2351:
236
237
238	mov	#1,r1
239	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
240	bf/s	ov_no_align_delay
241	tst	REG_LEN,REG_LEN
2422:
243	add	#-2,REG_SRC
244	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
245	add	#-2,REG_LEN	/*   len -= 2;                                                   */
246	cmp/hi	r1,REG_LEN
247	bt/s	2b
248	mov.w	r0,@-REG_DST	/* }                       */
249
250
251ov_no_align:
252	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
253ov_no_align_delay:
254	bt	9f
2551:
256	add	#-1,REG_SRC
257	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
258	add	#-1,REG_LEN	/*    len--;               */
259	tst	REG_LEN,REG_LEN
260	bf/s	1b
261	mov.b	r0,@-REG_DST	/* }                       */
2629:
263	rts
264#ifdef REG_DST0
265	mov	REG_DST0,r0
266#else
267	nop
268#endif
269