1322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek/*
2322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Copyright (C) 2008-2009 PetaLogix
4322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Copyright (C) 2007 John Williams
5322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
6322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Reasonably optimised generic C-code for memcpy on Microblaze
7322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * This is generic C code to do efficient, alignment-aware memmove.
8322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
9322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
12af901ca181d92aac3a7dc265144a9081a86d8f39André Goddard Rosa * Attempts were made, unsuccessfully, to contact the original
13322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * author of this code (Michael Morrow, Intel).  Below is the original
14322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * copyright notice.
15322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek *
16322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * This software has been developed by Intel Corporation.
17322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * Intel specifically disclaims all warranties, express or
18322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * implied, and all liability, including consequential and
19322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * other indirect damages, for the use of this program, including
20322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * liability for infringement of any proprietary rights,
21322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * and including the warranties of merchantability and fitness
22322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * for a particular purpose. Intel does not assume any
23322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * responsibility for and errors which may appear in this program
24322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek * not any responsibility to update it.
25322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek */
26322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
27d64af918feb6cb81c396d6d2dabb738bc51dda3fMichal Simek#include <linux/export.h>
28322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/types.h>
29322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/stddef.h>
30322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/compiler.h>
31322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#include <linux/string.h>
32322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
33322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#ifdef __HAVE_ARCH_MEMMOVE
3493e2e85139509338c68279c7260ebb68177b23a9Michal Simek#ifndef CONFIG_OPT_LIB_FUNCTION
35322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simekvoid *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
36322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek{
37322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	const char *src = v_src;
38322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	char *dst = v_dst;
39322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
40322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	if (!c)
41322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		return v_dst;
42322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
43322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Use memcpy when source is higher than dest */
44322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	if (v_dst <= v_src)
45322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		return memcpy(v_dst, v_src, c);
46322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
47322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* copy backwards, from end to beginning */
48322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	src += c;
49322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	dst += c;
50322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
51322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Simple, byte oriented memmove. */
52322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	while (c--)
53322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
54322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
55322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	return v_dst;
5693e2e85139509338c68279c7260ebb68177b23a9Michal Simek}
5793e2e85139509338c68279c7260ebb68177b23a9Michal Simek#else /* CONFIG_OPT_LIB_FUNCTION */
5893e2e85139509338c68279c7260ebb68177b23a9Michal Simekvoid *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
5993e2e85139509338c68279c7260ebb68177b23a9Michal Simek{
6093e2e85139509338c68279c7260ebb68177b23a9Michal Simek	const char *src = v_src;
6193e2e85139509338c68279c7260ebb68177b23a9Michal Simek	char *dst = v_dst;
6293e2e85139509338c68279c7260ebb68177b23a9Michal Simek	const uint32_t *i_src;
6393e2e85139509338c68279c7260ebb68177b23a9Michal Simek	uint32_t *i_dst;
6493e2e85139509338c68279c7260ebb68177b23a9Michal Simek
6593e2e85139509338c68279c7260ebb68177b23a9Michal Simek	if (!c)
6693e2e85139509338c68279c7260ebb68177b23a9Michal Simek		return v_dst;
6793e2e85139509338c68279c7260ebb68177b23a9Michal Simek
6893e2e85139509338c68279c7260ebb68177b23a9Michal Simek	/* Use memcpy when source is higher than dest */
6993e2e85139509338c68279c7260ebb68177b23a9Michal Simek	if (v_dst <= v_src)
7093e2e85139509338c68279c7260ebb68177b23a9Michal Simek		return memcpy(v_dst, v_src, c);
7193e2e85139509338c68279c7260ebb68177b23a9Michal Simek
72322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* The following code tries to optimize the copy by using unsigned
73322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * alignment. This will work fine if both source and destination are
74322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * aligned on the same boundary. However, if they are aligned on
75322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * different boundaries shifts will be necessary. This might result in
76322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 * bad performance on MicroBlaze systems without a barrel shifter.
77322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	 */
78322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* FIXME this part needs more test */
79322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Do a descending copy - this is a bit trickier! */
80322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	dst += c;
81322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	src += c;
82322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
83322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	if (c >= 4) {
84322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		unsigned  value, buf_hold;
85322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
8625985edcedea6396277003854657b5f3cb31a628Lucas De Marchi		/* Align the destination to a word boundary. */
8725985edcedea6396277003854657b5f3cb31a628Lucas De Marchi		/* This is done in an endian independent manner. */
88322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
89322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		switch ((unsigned long)dst & 3) {
90322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 3:
91322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			*--dst = *--src;
92322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			--c;
93322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 2:
94322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			*--dst = *--src;
95322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			--c;
96322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 1:
97322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			*--dst = *--src;
98322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			--c;
99322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		}
100322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
101322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		i_dst = (void *)dst;
102322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		/* Choose a copy scheme based on the source */
103322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		/* alignment relative to dstination. */
104322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		switch ((unsigned long)src & 3) {
105322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x0:	/* Both byte offsets are aligned */
106322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
107322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src  = (const void *)src;
108322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
109322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4)
110322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = *--i_src;
111322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
112322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src  = (const void *)i_src;
113322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
114322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x1:	/* Unaligned - Off by 1 */
115322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Word align the source */
116322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src = (const void *) (((unsigned)src + 4) & ~3);
1171180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#ifndef __MICROBLAZEEL__
118322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Load the holding buffer */
119322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			buf_hold = *--i_src >> 24;
120322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
121322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4) {
122322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				value = *--i_src;
123322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = buf_hold << 8 | value;
124322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				buf_hold = value >> 24;
125322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			}
1261180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#else
1271180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			/* Load the holding buffer */
1281180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			buf_hold = (*--i_src & 0xFF) << 24;
129322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
1301180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			for (; c >= 4; c -= 4) {
1311180b28ca82c529972bfd438467d5cd71cca5372Michal Simek				value = *--i_src;
1326bd55f0bbaebb79b39e147aa864401fd0c94db82Michal Simek				*--i_dst = buf_hold |
1336bd55f0bbaebb79b39e147aa864401fd0c94db82Michal Simek						((value & 0xFFFFFF00) >> 8);
1341180b28ca82c529972bfd438467d5cd71cca5372Michal Simek				buf_hold = (value  & 0xFF) << 24;
1351180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			}
1361180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#endif
137322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Realign the source */
138322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src = (const void *)i_src;
139322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src += 1;
140322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
141322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x2:	/* Unaligned - Off by 2 */
142322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Word align the source */
143322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src = (const void *) (((unsigned)src + 4) & ~3);
1441180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#ifndef __MICROBLAZEEL__
145322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Load the holding buffer */
146322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			buf_hold = *--i_src >> 16;
147322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
148322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4) {
149322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				value = *--i_src;
150322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = buf_hold << 16 | value;
151322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				buf_hold = value >> 16;
152322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			}
1531180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#else
1541180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			/* Load the holding buffer */
1551180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			buf_hold = (*--i_src & 0xFFFF) << 16;
156322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
1571180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			for (; c >= 4; c -= 4) {
1581180b28ca82c529972bfd438467d5cd71cca5372Michal Simek				value = *--i_src;
1596bd55f0bbaebb79b39e147aa864401fd0c94db82Michal Simek				*--i_dst = buf_hold |
1606bd55f0bbaebb79b39e147aa864401fd0c94db82Michal Simek						((value & 0xFFFF0000) >> 16);
1611180b28ca82c529972bfd438467d5cd71cca5372Michal Simek				buf_hold = (value & 0xFFFF) << 16;
1621180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			}
1631180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#endif
164322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Realign the source */
165322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src = (const void *)i_src;
166322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src += 2;
167322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
168322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		case 0x3:	/* Unaligned - Off by 3 */
169322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Word align the source */
170322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			i_src = (const void *) (((unsigned)src + 4) & ~3);
1711180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#ifndef __MICROBLAZEEL__
172322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Load the holding buffer */
173322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			buf_hold = *--i_src >> 8;
174322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
175322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			for (; c >= 4; c -= 4) {
176322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				value = *--i_src;
177322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				*--i_dst = buf_hold << 24 | value;
178322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek				buf_hold = value >> 8;
179322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			}
1801180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#else
1811180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			/* Load the holding buffer */
1821180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			buf_hold = (*--i_src & 0xFFFFFF) << 8;
183322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
1841180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			for (; c >= 4; c -= 4) {
1851180b28ca82c529972bfd438467d5cd71cca5372Michal Simek				value = *--i_src;
1866bd55f0bbaebb79b39e147aa864401fd0c94db82Michal Simek				*--i_dst = buf_hold |
1876bd55f0bbaebb79b39e147aa864401fd0c94db82Michal Simek						((value & 0xFF000000) >> 24);
188473ff6609c0778c5939164c5c57676b74164be71Joe Perches				buf_hold = (value & 0xFFFFFF) << 8;
1891180b28ca82c529972bfd438467d5cd71cca5372Michal Simek			}
1901180b28ca82c529972bfd438467d5cd71cca5372Michal Simek#endif
191322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			/* Realign the source */
192322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src = (const void *)i_src;
193322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			src += 3;
194322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek			break;
195322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		}
196322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		dst = (void *)i_dst;
197322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	}
198322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek
19925985edcedea6396277003854657b5f3cb31a628Lucas De Marchi	/* simple fast copy, ... unless a cache boundary is crossed */
200322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	/* Finish off any remaining bytes */
201322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	switch (c) {
202322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 4:
203322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
204322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 3:
205322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
206322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 2:
207322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
208322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	case 1:
209322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek		*--dst = *--src;
210322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	}
211322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek	return v_dst;
212322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek}
21393e2e85139509338c68279c7260ebb68177b23a9Michal Simek#endif /* CONFIG_OPT_LIB_FUNCTION */
214322ae8eb91c1730728400c5b8dd1108aef1205b8Michal SimekEXPORT_SYMBOL(memmove);
215322ae8eb91c1730728400c5b8dd1108aef1205b8Michal Simek#endif /* __HAVE_ARCH_MEMMOVE */
216