memmove.c revision 322ae8eb91c1730728400c5b8dd1108aef1205b8
1322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek/*
2322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Copyright (C) 2008-2009 PetaLogix
4322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Copyright (C) 2007 John Williams
5322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
6322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Reasonably optimised generic C-code for memcpy on Microblaze
7322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * This is generic C code to do efficient, alignment-aware memmove.
8322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
9322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
12322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Attempts were made, unsuccesfully, to contact the original
13322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * author of this code (Michael Morrow, Intel).  Below is the original
14322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * copyright notice.
15322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
16322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * This software has been developed by Intel Corporation.
17322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Intel specifically disclaims all warranties, express or
18322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * implied, and all liability, including consequential and
19322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * other indirect damages, for the use of this program, including
20322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * liability for infringement of any proprietary rights,
21322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * and including the warranties of merchantability and fitness
22322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * for a particular purpose. Intel does not assume any
23322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * responsibility for and errors which may appear in this program
24322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * not any responsibility to update it.
25322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek */
26322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
27322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/types.h>
28322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/stddef.h>
29322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/compiler.h>
30322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/module.h>
31322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/string.h>
32322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
33322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#ifdef __HAVE_ARCH_MEMMOVE
34322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simekvoid *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
35322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek{
36322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	const char *src = v_src;
37322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	char *dst = v_dst;
38322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
39322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#ifdef CONFIG_OPT_LIB_FUNCTION
40322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	const uint32_t *i_src;
41322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	uint32_t *i_dst;
42322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#endif
43322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
44322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	if (!c)
45322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		return v_dst;
46322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
47322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Use memcpy when source is higher than dest */
48322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	if (v_dst <= v_src)
49322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		return memcpy(v_dst, v_src, c);
50322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
51322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#ifndef CONFIG_OPT_LIB_FUNCTION
52322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* copy backwards, from end to beginning */
53322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	src += c;
54322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	dst += c;
55322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
56322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Simple, byte oriented memmove. */
57322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	while (c--)
58322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
59322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
60322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	return v_dst;
61322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#else
62322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* The following code tries to optimize the copy by using unsigned
63322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * alignment. This will work fine if both source and destination are
64322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * aligned on the same boundary. However, if they are aligned on
65322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * different boundaries shifts will be necessary. This might result in
66322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * bad performance on MicroBlaze systems without a barrel shifter.
67322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 */
68322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* FIXME this part needs more test */
69322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Do a descending copy - this is a bit trickier! */
70322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	dst += c;
71322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	src += c;
72322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
73322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	if (c >= 4) {
74322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		unsigned  value, buf_hold;
75322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
76322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		/* Align the destination to a word boundry. */
77322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		/* This is done in an endian independant manner. */
78322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
79322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		switch ((unsigned long)dst & 3) {
80322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 3:
81322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			*--dst = *--src;
82322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			--c;
83322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 2:
84322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			*--dst = *--src;
85322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			--c;
86322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 1:
87322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			*--dst = *--src;
88322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			--c;
89322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		}
90322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
91322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		i_dst = (void *)dst;
92322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		/* Choose a copy scheme based on the source */
93322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		/* alignment relative to dstination. */
94322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		switch ((unsigned long)src & 3) {
95322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x0:	/* Both byte offsets are aligned */
96322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
97322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src  = (const void *)src;
98322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
99322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4)
100322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = *--i_src;
101322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
102322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src  = (const void *)i_src;
103322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
104322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x1:	/* Unaligned - Off by 1 */
105322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Word align the source */
106322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src = (const void *) (((unsigned)src + 4) & ~3);
107322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
108322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Load the holding buffer */
109322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			buf_hold = *--i_src >> 24;
110322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
111322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4) {
112322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				value = *--i_src;
113322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = buf_hold << 8 | value;
114322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				buf_hold = value >> 24;
115322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			}
116322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
117322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Realign the source */
118322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src = (const void *)i_src;
119322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src += 1;
120322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
121322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x2:	/* Unaligned - Off by 2 */
122322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Word align the source */
123322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src = (const void *) (((unsigned)src + 4) & ~3);
124322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
125322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Load the holding buffer */
126322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			buf_hold = *--i_src >> 16;
127322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
128322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4) {
129322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				value = *--i_src;
130322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = buf_hold << 16 | value;
131322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				buf_hold = value >> 16;
132322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			}
133322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
134322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Realign the source */
135322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src = (const void *)i_src;
136322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src += 2;
137322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
138322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x3:	/* Unaligned - Off by 3 */
139322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Word align the source */
140322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src = (const void *) (((unsigned)src + 4) & ~3);
141322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
142322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Load the holding buffer */
143322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			buf_hold = *--i_src >> 8;
144322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
145322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4) {
146322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				value = *--i_src;
147322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = buf_hold << 24 | value;
148322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				buf_hold = value >> 8;
149322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			}
150322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
151322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Realign the source */
152322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src = (const void *)i_src;
153322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src += 3;
154322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
155322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		}
156322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		dst = (void *)i_dst;
157322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	}
158322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
159322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* simple fast copy, ... unless a cache boundry is crossed */
160322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Finish off any remaining bytes */
161322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	switch (c) {
162322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 4:
163322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
164322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 3:
165322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
166322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 2:
167322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
168322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 1:
169322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
170322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	}
171322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	return v_dst;
172322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#endif
173322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek}
174322ae8eb91c1730728400c5b8dd1108aef1205b8Michal SimekEXPORT_SYMBOL(memmove);
175322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#endif /* __HAVE_ARCH_MEMMOVE */
176