11da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
21da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    Optimized memory copy routines.
31da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
41da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    Copyright (C) 2004 Randolph Chung <tausq@debian.org>
55b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller *    Copyright (C) 2013 Helge Deller <deller@gmx.de>
61da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
71da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    This program is free software; you can redistribute it and/or modify
81da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    it under the terms of the GNU General Public License as published by
91da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    the Free Software Foundation; either version 2, or (at your option)
101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    any later version.
111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    This program is distributed in the hope that it will be useful,
131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    but WITHOUT ANY WARRANTY; without even the implied warranty of
141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    GNU General Public License for more details.
161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    You should have received a copy of the GNU General Public License
181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    along with this program; if not, write to the Free Software
191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    Portions derived from the GNU C Library
221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Several strategies are tried to try to get the best performance for various
251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * general registers.  Unaligned copies are handled either by aligning the
281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * destination and then using shift-and-write method, or in a few cases by
291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * falling back to a byte-at-a-time copy.
301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * I chose to implement this in C because it is easier to maintain and debug,
321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * and in my experiments it appears that the C code generated by gcc (3.3/3.4
331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * at the time of writing) is fairly optimal. Unfortunately some of the
341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * semantics of the copy routine (exception handling) is difficult to express
351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * in C, so we have to play some tricks to get it to work.
361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * All the loads and stores are done via explicit asm() code in order to use
381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * the right space registers.
391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Testing with various alignments and buffer sizes shows that this code is
411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * often >10x faster than a simple byte-at-a-time copy, even for strangely
421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * aligned operands. It is interesting to note that the glibc version
431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * of memcpy (written in C) is actually quite fast already. This routine is
441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * able to beat it by 30-40% for aligned copies because of the loop unrolling,
451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * but in some cases the glibc version is still slightly faster. This lends
461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * more credibility that gcc can generate very good code as long as we are
471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * careful.
481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * TODO:
501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * - cache prefetching needs more experimentation to get optimal settings
511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * - try not to use the post-increment address modifiers; they create additional
521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *   interlocks
531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * - replace byte-copy loops with stybs sequences
541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#ifdef __KERNEL__
571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/module.h>
581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/compiler.h>
59db080f9c530f78dad661257885a1893506077068Helge Deller#include <linux/uaccess.h>
601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define s_space "%%sr1"
611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define d_space "%%sr2"
621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#else
631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include "memcpy.h"
641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define s_space "%%sr0"
651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define d_space "%%sr0"
661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define pa_memcpy new2_copy
671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#endif
681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus TorvaldsDECLARE_PER_CPU(struct exception_data, exception_data);
701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define preserve_branch(label)	do {					\
72d8d0524a39056dbdafece1ed65ae51afc711c8b5Helge Deller	volatile int dummy = 0;						\
731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* The following branch is never taken, it's just here to  */	\
741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* prevent gcc from optimizing away our exception code. */ 	\
751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	if (unlikely(dummy != dummy))					\
761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		goto label;						\
771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds} while (0)
781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define get_kernel_space() (0)
811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define MERGE(w0, sh_1, w1, sh_2)  ({					\
831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	unsigned int _r;						\
841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	asm volatile (							\
851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	"mtsar %3\n"							\
861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	"shrpw %1, %2, %%sar, %0\n"					\
871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "=r"(_r)							\
881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "r"(w0), "r"(w1), "r"(sh_2)					\
891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	);								\
901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	_r;								\
911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds})
921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define THRESHOLD	16
931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#ifdef DEBUG_MEMCPY
9591bae23ce185b74c9b6dda86b92bb204a1c951c3Harvey Harrison#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#else
971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define DPRINTF(fmt, args...)
981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#endif
991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)	\
1011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ (				\
1020b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	"1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t"	\
1030b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
1041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: _tt(_t), "+r"(_a)				\
1051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: 						\
1061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "r8")
1071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) 	\
1091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ (				\
1100b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	"1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t"	\
1110b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
1121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "+r"(_a) 					\
1131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: _tt(_t)					\
1141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "r8")
1151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
1171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
1181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
1191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
1201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
1211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
1221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) 	\
1241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ (				\
1250b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	"1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t"	\
1260b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
1271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: _tt(_t) 					\
1281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "r"(_a)					\
1291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "r8")
1301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) 	\
1321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ (				\
1330b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	"1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" 	\
1340b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
1351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: 						\
1361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: _tt(_t), "r"(_a)				\
1371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	: "r8")
1381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define ldw(_s,_o,_a,_t,_e)	def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
1401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define stw(_s,_t,_o,_a,_e) 	def_store_insn(stw,"r",_s,_t,_o,_a,_e)
1411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#ifdef  CONFIG_PREFETCH
143f13cec8447f18cca3a0feb4b83b7ba6fae9e59aeAdrian Bunkstatic inline void prefetch_src(const void *addr)
1441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
1451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
1461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
1471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
148f13cec8447f18cca3a0feb4b83b7ba6fae9e59aeAdrian Bunkstatic inline void prefetch_dst(const void *addr)
1491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
1501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
1511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
1521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#else
1530b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller#define prefetch_src(addr) do { } while(0)
1540b3d643f9ead9b5141dedbb2d1b06ce15469fc4aHelge Deller#define prefetch_dst(addr) do { } while(0)
1551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#endif
1561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1575b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller#define PA_MEMCPY_OK		0
1585b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller#define PA_MEMCPY_LOAD_ERROR	1
1595b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller#define PA_MEMCPY_STORE_ERROR	2
1605b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
1611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
1621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * per loop.  This code is derived from glibc.
1631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
1649af63aedb84c4281cde1610280246f7749c27799Helge Dellerstatic noinline unsigned long copy_dstaligned(unsigned long dst,
1655b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller					unsigned long src, unsigned long len)
1661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
1671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* gcc complains that a2 and a3 may be uninitialized, but actually
1681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	 * they cannot be.  Initialize a2/a3 to shut gcc up.
1691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	 */
1701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	register unsigned int a0, a1, a2 = 0, a3 = 0;
1711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	int sh_1, sh_2;
1721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* prefetch_src((const void *)src); */
1741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* Calculate how to shift a word read at the memory operation
1761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	   aligned srcp to make it aligned for copy.  */
1771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	sh_1 = 8 * (src % sizeof(unsigned int));
1781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	sh_2 = 8 * sizeof(unsigned int) - sh_1;
1791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* Make src aligned by rounding it down.  */
1811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	src &= -sizeof(unsigned int);
1821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	switch (len % 4)
1841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	{
1851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		case 2:
1861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			/* a1 = ((unsigned int *) src)[0];
1871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			   a2 = ((unsigned int *) src)[1]; */
1881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 0, src, a1, cda_ldw_exc);
1891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 4, src, a2, cda_ldw_exc);
1901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			src -= 1 * sizeof(unsigned int);
1911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			dst -= 3 * sizeof(unsigned int);
1921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			len += 2;
1931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			goto do1;
1941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		case 3:
1951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			/* a0 = ((unsigned int *) src)[0];
1961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			   a1 = ((unsigned int *) src)[1]; */
1971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 0, src, a0, cda_ldw_exc);
1981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 4, src, a1, cda_ldw_exc);
1991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			src -= 0 * sizeof(unsigned int);
2001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			dst -= 2 * sizeof(unsigned int);
2011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			len += 1;
2021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			goto do2;
2031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		case 0:
2041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			if (len == 0)
2055b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller				return PA_MEMCPY_OK;
2061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			/* a3 = ((unsigned int *) src)[0];
2071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			   a0 = ((unsigned int *) src)[1]; */
2081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 0, src, a3, cda_ldw_exc);
2091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 4, src, a0, cda_ldw_exc);
2101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			src -=-1 * sizeof(unsigned int);
2111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			dst -= 1 * sizeof(unsigned int);
2121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			len += 0;
2131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			goto do3;
2141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		case 1:
2151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			/* a2 = ((unsigned int *) src)[0];
2161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			   a3 = ((unsigned int *) src)[1]; */
2171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 0, src, a2, cda_ldw_exc);
2181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldw(s_space, 4, src, a3, cda_ldw_exc);
2191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			src -=-2 * sizeof(unsigned int);
2201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			dst -= 0 * sizeof(unsigned int);
2211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			len -= 1;
2221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			if (len == 0)
2231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				goto do0;
2241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			goto do4;			/* No-op.  */
2251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
2261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	do
2281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	{
2291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
2301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsdo4:
2311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* a0 = ((unsigned int *) src)[0]; */
2321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldw(s_space, 0, src, a0, cda_ldw_exc);
2331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
2341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
2351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsdo3:
2361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* a1 = ((unsigned int *) src)[1]; */
2371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldw(s_space, 4, src, a1, cda_ldw_exc);
2381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
2391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
2401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsdo2:
2411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* a2 = ((unsigned int *) src)[2]; */
2421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldw(s_space, 8, src, a2, cda_ldw_exc);
2431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
2441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
2451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsdo1:
2461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* a3 = ((unsigned int *) src)[3]; */
2471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldw(s_space, 12, src, a3, cda_ldw_exc);
2481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
2491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
2501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		src += 4 * sizeof(unsigned int);
2521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		dst += 4 * sizeof(unsigned int);
2531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		len -= 4;
2541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
2551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	while (len != 0);
2561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsdo0:
2581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
2591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
2601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	preserve_branch(handle_load_error);
2621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	preserve_branch(handle_store_error);
2631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2645b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	return PA_MEMCPY_OK;
2651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldshandle_load_error:
2671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ ("cda_ldw_exc:\n");
2685b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	return PA_MEMCPY_LOAD_ERROR;
2691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldshandle_store_error:
2711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ ("cda_stw_exc:\n");
2725b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	return PA_MEMCPY_STORE_ERROR;
2731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
2741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2765b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
2775b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller * In case of an access fault the faulty address can be read from the per_cpu
2785b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller * exception data struct. */
2799af63aedb84c4281cde1610280246f7749c27799Helge Dellerstatic noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
2805b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller					unsigned long len)
2811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
2821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	register unsigned long src, dst, t1, t2, t3;
2831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	register unsigned char *pcs, *pcd;
2841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	register unsigned int *pws, *pwd;
2851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	register double *pds, *pdd;
2865b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	unsigned long ret;
2871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	src = (unsigned long)srcp;
2891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	dst = (unsigned long)dstp;
2901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pcs = (unsigned char *)srcp;
2911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pcd = (unsigned char *)dstp;
2921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* prefetch_src((const void *)srcp); */
2941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	if (len < THRESHOLD)
2961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		goto byte_copy;
2971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* Check alignment */
2991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	t1 = (src ^ dst);
3001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	if (unlikely(t1 & (sizeof(double)-1)))
3011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		goto unaligned_copy;
3021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* src and dst have same alignment. */
3041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* Copy bytes till we are double-aligned. */
3061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	t2 = src & (sizeof(double) - 1);
3071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	if (unlikely(t2 != 0)) {
3081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		t2 = sizeof(double) - t2;
3091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		while (t2 && len) {
3101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			/* *pcd++ = *pcs++; */
3111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldbma(s_space, pcs, t3, pmc_load_exc);
3121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			len--;
3131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			stbma(d_space, t3, pcd, pmc_store_exc);
3141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			t2--;
3151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		}
3161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
3171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pds = (double *)pcs;
3191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pdd = (double *)pcd;
3201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
321fa681a1800a58234afe4d876c1752c0751826d22Randolph Chung#if 0
3221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* Copy 8 doubles at a time */
3231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	while (len >= 8*sizeof(double)) {
3241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		register double r1, r2, r3, r4, r5, r6, r7, r8;
3251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* prefetch_src((char *)pds + L1_CACHE_BYTES); */
3261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r1, pmc_load_exc);
3271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r2, pmc_load_exc);
3281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r3, pmc_load_exc);
3291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r4, pmc_load_exc);
3301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r1, pdd, pmc_store_exc);
3311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r2, pdd, pmc_store_exc);
3321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r3, pdd, pmc_store_exc);
3331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r4, pdd, pmc_store_exc);
3341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#if 0
3361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		if (L1_CACHE_BYTES <= 32)
3371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			prefetch_src((char *)pds + L1_CACHE_BYTES);
3381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#endif
3391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r5, pmc_load_exc);
3401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r6, pmc_load_exc);
3411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r7, pmc_load_exc);
3421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		flddma(s_space, pds, r8, pmc_load_exc);
3431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r5, pdd, pmc_store_exc);
3441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r6, pdd, pmc_store_exc);
3451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r7, pdd, pmc_store_exc);
3461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		fstdma(d_space, r8, pdd, pmc_store_exc);
3471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		len -= 8*sizeof(double);
3481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
349fa681a1800a58234afe4d876c1752c0751826d22Randolph Chung#endif
3501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pws = (unsigned int *)pds;
3521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pwd = (unsigned int *)pdd;
3531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsword_copy:
3551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	while (len >= 8*sizeof(unsigned int)) {
3561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
3571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* prefetch_src((char *)pws + L1_CACHE_BYTES); */
3581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r1, pmc_load_exc);
3591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r2, pmc_load_exc);
3601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r3, pmc_load_exc);
3611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r4, pmc_load_exc);
3621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r1, pwd, pmc_store_exc);
3631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r2, pwd, pmc_store_exc);
3641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r3, pwd, pmc_store_exc);
3651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r4, pwd, pmc_store_exc);
3661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r5, pmc_load_exc);
3681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r6, pmc_load_exc);
3691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r7, pmc_load_exc);
3701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r8, pmc_load_exc);
3711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r5, pwd, pmc_store_exc);
3721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r6, pwd, pmc_store_exc);
3731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r7, pwd, pmc_store_exc);
3741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r8, pwd, pmc_store_exc);
3751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		len -= 8*sizeof(unsigned int);
3761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
3771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	while (len >= 4*sizeof(unsigned int)) {
3791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		register unsigned int r1,r2,r3,r4;
3801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r1, pmc_load_exc);
3811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r2, pmc_load_exc);
3821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r3, pmc_load_exc);
3831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldwma(s_space, pws, r4, pmc_load_exc);
3841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r1, pwd, pmc_store_exc);
3851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r2, pwd, pmc_store_exc);
3861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r3, pwd, pmc_store_exc);
3871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stwma(d_space, r4, pwd, pmc_store_exc);
3881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		len -= 4*sizeof(unsigned int);
3891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
3901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pcs = (unsigned char *)pws;
3921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pcd = (unsigned char *)pwd;
3931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsbyte_copy:
3951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	while (len) {
3961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* *pcd++ = *pcs++; */
3971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		ldbma(s_space, pcs, t3, pmc_load_exc);
3981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		stbma(d_space, t3, pcd, pmc_store_exc);
3991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		len--;
4001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
4011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4025b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	return PA_MEMCPY_OK;
4031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunaligned_copy:
4051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* possibly we are aligned on a word, but not on a double... */
40687451d850c895470a122308086069b7c326c914bRandolph Chung	if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
4071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		t2 = src & (sizeof(unsigned int) - 1);
4081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		if (unlikely(t2 != 0)) {
4101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			t2 = sizeof(unsigned int) - t2;
4111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			while (t2) {
4121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				/* *pcd++ = *pcs++; */
4131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				ldbma(s_space, pcs, t3, pmc_load_exc);
4141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				stbma(d_space, t3, pcd, pmc_store_exc);
4151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				len--;
4161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				t2--;
4171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			}
4181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		}
4191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		pws = (unsigned int *)pcs;
4211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		pwd = (unsigned int *)pcd;
4221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		goto word_copy;
4231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
4241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* Align the destination.  */
4261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
4271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
4281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		while (t2) {
4291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			/* *pcd++ = *pcs++; */
4301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			ldbma(s_space, pcs, t3, pmc_load_exc);
4311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			stbma(d_space, t3, pcd, pmc_store_exc);
4321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			len--;
4331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			t2--;
4341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		}
4351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		dst = (unsigned long)pcd;
4361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		src = (unsigned long)pcs;
4371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	}
4381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4395b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
4401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	if (ret)
4411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		return ret;
4421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pcs += (len & -sizeof(unsigned int));
4441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pcd += (len & -sizeof(unsigned int));
4451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	len %= sizeof(unsigned int);
4461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	preserve_branch(handle_load_error);
4481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	preserve_branch(handle_store_error);
4491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	goto byte_copy;
4511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldshandle_load_error:
4531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ ("pmc_load_exc:\n");
4545b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	return PA_MEMCPY_LOAD_ERROR;
4551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldshandle_store_error:
4571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	__asm__ __volatile__ ("pmc_store_exc:\n");
4585b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	return PA_MEMCPY_STORE_ERROR;
4595b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller}
4605b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
4615b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
4625b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
4635b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Dellerstatic unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
4645b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller{
4655b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	unsigned long ret, fault_addr, reference;
4665b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	struct exception_data *d;
4675b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
4685b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	ret = pa_memcpy_internal(dstp, srcp, len);
4695b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	if (likely(ret == PA_MEMCPY_OK))
4705b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller		return 0;
4715b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
4725b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	/* if a load or store fault occured we can get the faulty addr */
473496252f787560df18a65fdc74dc3180f7cd2c723Christoph Lameter	d = this_cpu_ptr(&exception_data);
4745b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	fault_addr = d->fault_addr;
4755b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
4765b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	/* error in load or store? */
4775b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	if (ret == PA_MEMCPY_LOAD_ERROR)
4785b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller		reference = (unsigned long) srcp;
4795b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	else
4805b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller		reference = (unsigned long) dstp;
4815b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
4825b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
4835b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller		ret, len, fault_addr, reference);
4845b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller
4855b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	if (fault_addr >= reference)
4865b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller		return len - (fault_addr - reference);
4875b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller	else
4885b879d78bc0818aa710f5d4d9abbfc2aca075cc3Helge Deller		return len;
4891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
4901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#ifdef __KERNEL__
4921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long copy_to_user(void __user *dst, const void *src, unsigned long len)
4931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
4941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_kernel_space(), 1);
4951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_user_space(), 2);
4961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	return pa_memcpy((void __force *)dst, src, len);
4971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
4981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
499888c31fc83ddc7fcd9947cb67c5718b4e3dd5e1bHelge DellerEXPORT_SYMBOL(__copy_from_user);
500888c31fc83ddc7fcd9947cb67c5718b4e3dd5e1bHelge Dellerunsigned long __copy_from_user(void *dst, const void __user *src, unsigned long len)
5011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
5021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_user_space(), 1);
5031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_kernel_space(), 2);
5041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	return pa_memcpy(dst, (void __force *)src, len);
5051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
5061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
5071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len)
5081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
5091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_user_space(), 1);
5101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_user_space(), 2);
5111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	return pa_memcpy((void __force *)dst, (void __force *)src, len);
5121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
5131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
5141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
5151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsvoid * memcpy(void * dst,const void *src, size_t count)
5161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
5171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_kernel_space(), 1);
5181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mtsp(get_kernel_space(), 2);
5191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	pa_memcpy(dst, src, count);
5201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	return dst;
5211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
5221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
5231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus TorvaldsEXPORT_SYMBOL(copy_to_user);
5241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus TorvaldsEXPORT_SYMBOL(copy_from_user);
5251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus TorvaldsEXPORT_SYMBOL(copy_in_user);
5261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus TorvaldsEXPORT_SYMBOL(memcpy);
527db080f9c530f78dad661257885a1893506077068Helge Deller
528db080f9c530f78dad661257885a1893506077068Helge Dellerlong probe_kernel_read(void *dst, const void *src, size_t size)
529db080f9c530f78dad661257885a1893506077068Helge Deller{
530db080f9c530f78dad661257885a1893506077068Helge Deller	unsigned long addr = (unsigned long)src;
531db080f9c530f78dad661257885a1893506077068Helge Deller
532964f413323e8306ac0acb5e08ccdb5f12418835bHelge Deller	if (addr < PAGE_SIZE)
533db080f9c530f78dad661257885a1893506077068Helge Deller		return -EFAULT;
534db080f9c530f78dad661257885a1893506077068Helge Deller
535db080f9c530f78dad661257885a1893506077068Helge Deller	/* check for I/O space F_EXTEND(0xfff00000) access as well? */
536db080f9c530f78dad661257885a1893506077068Helge Deller
537db080f9c530f78dad661257885a1893506077068Helge Deller	return __probe_kernel_read(dst, src, size);
538db080f9c530f78dad661257885a1893506077068Helge Deller}
539db080f9c530f78dad661257885a1893506077068Helge Deller
5401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#endif
541