11da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
21da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * User address space access functions.
31da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * The non-inlined parts of asm-cris/uaccess.h are here.
41da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
51da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Copyright (C) 2000, Axis Communications AB.
61da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
71da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Written by Hans-Peter Nilsson.
81da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
91da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/uaccess.h>
121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Asm:s have been tweaked (within the domain of correctness) to give
141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   satisfactory results for "gcc version 2.96 20000427 (experimental)".
151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   Check regularly...
171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   Note that the PC saved at a bus-fault is the address *after* the
191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   faulting instruction, which means the branch-target for instructions in
201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   delay-slots for taken branches.  Note also that the postincrement in
211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   the instruction is performed regardless of bus-fault; the register is
221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   seen updated in fault handlers.
231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   Oh, and on the code formatting issue, to whomever feels like "fixing
251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   string.c too.  I just don't think too many people will hack this file
271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   for the code format to be an issue.  */
281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Copy to userspace.  This is based on the memcpy used for
311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   kernel-to-kernel copying; see "string.c".  */
321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long
341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* We want the parameters put in special registers.
371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     Make sure the compiler is able to make something useful of this.
381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     FIXME: Comment for old gcc version.  Check.
4149b4ff3304b52b18c490fc4deb400b61bb7ed142Simon Arlott     If gcc was alright, it really would need no temporaries, and no
421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     stack space to save stuff on. */
431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register char *dst __asm__ ("r13") = pdst;
451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register const char *src __asm__ ("r11") = psrc;
461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register int n __asm__ ("r12") = pn;
471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register int retn __asm__ ("r10") = 0;
481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* When src is aligned but not dst, this makes a few extra needless
511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     cycles.  I believe it would take as many to check that the
521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     re-alignment was unnecessary.  */
531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  if (((unsigned long) dst & 3) != 0
541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      /* Don't align if we wouldn't copy more than a few bytes; so we
551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	 don't have to check further for overflows.  */
561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      && n >= 3)
571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if ((unsigned long) dst & 1)
591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    {
601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_to_user_1 (dst, src, retn);
611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      n--;
621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    }
631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if ((unsigned long) dst & 2)
651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    {
661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_to_user_2 (dst, src, retn);
671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      n -= 2;
681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    }
691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* Decide which copying method to use. */
721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  if (n >= 44*2)		/* Break even between movem and
731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				   move16 is at 38.7*2, but modulo 44. */
741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* For large copies we use 'movem'.  */
761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* It is not optimal to tell the compiler about clobbering any
781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       registers; that will move the saving/restoring of those registers
791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       to the function prologue/epilogue, and make non-movem sizes
801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       suboptimal.
811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       This method is not foolproof; it assumes that the "asm reg"
831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       declarations at the beginning of the function really are used
841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       here (beware: they may be moved to temporary registers).
851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       This way, we do not have to save/move the registers around into
861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       temporaries; we can safely use them straight away.
871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       If you want to check that the allocation was right; then
891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       check the equalities in the first comment.  It should say
901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       "r13=r13, r11=r11, r12=r12".  */
911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    __asm__ volatile ("\
921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.err								\n\
941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.endif								\n\
952b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
962b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Save the registers we'll use in the movem process		\n\
972b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; on the stack.						\n\
982b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq	11*4,$sp						\n\
992b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	$r10,[$sp]						\n\
1002b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1012b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Now we've got this:						\n\
1022b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r11 - src							\n\
1032b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r13 - dst							\n\
1042b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r12 - n							\n\
1052b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1062b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Update n for the first loop					\n\
1072b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq	44,$r12							\n\
1082b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1092b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\
1102b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; branch, is that of the branch target, we actually point at the from-movem \n\
1112b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; for this case.  There is no ambiguity here; if there was a fault in that \n\
1122b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; instruction (meaning a kernel oops), the faulted PC would be the address \n\
1132b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; after *that* movem.							\n\
1142b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1152b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson0:									\n\
1162b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	[$r11+],$r10						\n\
1172b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq   44,$r12							\n\
1182b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	bge	0b							\n\
1192b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	$r10,[$r13+]						\n\
1202b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson1:									\n\
1212b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq   44,$r12  ;; compensate for last loop underflowing n	\n\
1222b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1232b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Restore registers from stack					\n\
1242b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem [$sp+],$r10						\n\
1252b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson2:									\n\
1262b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.section .fixup,\"ax\"						\n\
1272b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1282b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; To provide a correct count in r10 of bytes that failed to be copied,	\n\
1292b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; we jump back into the loop if the loop-branch was taken.  There is no	\n\
1302b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; performance penalty for sany use; the program will segfault soon enough.\n\
1312b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1322b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson3:									\n\
1332b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	move.d [$sp],$r10						\n\
1342b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq 44,$r10							\n\
1352b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	move.d $r10,[$sp]						\n\
1362b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	jump 0b								\n\
1372b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson4:									\n\
1382b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem [$sp+],$r10						\n\
1392b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq 44,$r10							\n\
1402b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq 44,$r12							\n\
1412b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	jump 2b								\n\
1422b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
1432b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.previous							\n\
1442b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.section __ex_table,\"a\"					\n\
1452b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.dword 0b,3b							\n\
1462b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.dword 1b,4b							\n\
1471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.previous"
1481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
1501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
1511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
1531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* Either we directly start copying, using dword copying in a loop, or
1551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     we copy as much as possible with 'movem' and then the last block (<44
1561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     bytes) is copied here.  This will work since 'movem' will have
1571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     updated SRC, DST and N.  */
1581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  while (n >= 16)
1601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
1611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    __asm_copy_to_user_16 (dst, src, retn);
1621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    n -= 16;
1631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
1641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* Having a separate by-four loops cuts down on cache footprint.
1661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     FIXME:  Test with and without; increasing switch to be 0..15.  */
1671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  while (n >= 4)
1681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
1691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    __asm_copy_to_user_4 (dst, src, retn);
1701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    n -= 4;
1711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
1721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  switch (n)
1741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
1751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 0:
1761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
1771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 1:
1781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_to_user_1 (dst, src, retn);
1791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
1801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 2:
1811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_to_user_2 (dst, src, retn);
1821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
1831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 3:
1841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_to_user_3 (dst, src, retn);
1851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
1861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
1871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  return retn;
1891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
1901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Copy from user to kernel, zeroing the bytes that were inaccessible in
1921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   userland.  The return-value is the number of bytes that were
1931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds   inaccessible.  */
1941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long
19607f2402b4adbcd0e6822ddc27953b63d4504faecJesper Nilsson__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
1971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
1981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* We want the parameters put in special registers.
1991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     Make sure the compiler is able to make something useful of this.
2001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
2011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     FIXME: Comment for old gcc version.  Check.
20349b4ff3304b52b18c490fc4deb400b61bb7ed142Simon Arlott     If gcc was alright, it really would need no temporaries, and no
2041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     stack space to save stuff on.  */
2051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register char *dst __asm__ ("r13") = pdst;
2071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register const char *src __asm__ ("r11") = psrc;
2081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register int n __asm__ ("r12") = pn;
2091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register int retn __asm__ ("r10") = 0;
2101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* The best reason to align src is that we then know that a read-fault
2121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     was for aligned bytes; there's no 1..3 remaining good bytes to
2131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     pickle.  */
2141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  if (((unsigned long) src & 3) != 0)
2151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
2161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if (((unsigned long) src & 1) && n != 0)
2171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    {
2181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_from_user_1 (dst, src, retn);
2191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      n--;
2201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    }
2211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if (((unsigned long) src & 2) && n >= 2)
2231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    {
2241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_from_user_2 (dst, src, retn);
2251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      n -= 2;
2261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    }
2271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* We only need one check after the unalignment-adjustments, because
2291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       if both adjustments were done, either both or neither reference
2301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       had an exception.  */
2311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if (retn != 0)
2321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      goto copy_exception_bytes;
2331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
2341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* Decide which copying method to use. */
2361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  if (n >= 44*2)		/* Break even between movem and
2371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				   move16 is at 38.7*2, but modulo 44.
2381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				   FIXME: We use move4 now.  */
2391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
2401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* For large copies we use 'movem' */
2411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* It is not optimal to tell the compiler about clobbering any
2431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       registers; that will move the saving/restoring of those registers
2441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       to the function prologue/epilogue, and make non-movem sizes
2451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       suboptimal.
2461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       This method is not foolproof; it assumes that the "asm reg"
2481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       declarations at the beginning of the function really are used
2491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       here (beware: they may be moved to temporary registers).
2501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       This way, we do not have to save/move the registers around into
2511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       temporaries; we can safely use them straight away.
2521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       If you want to check that the allocation was right; then
2541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       check the equalities in the first comment.  It should say
2551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       "r13=r13, r11=r11, r12=r12" */
2562b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson    __asm__ volatile ("\n\
2571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
2581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.err								\n\
2591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.endif								\n\
2602b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
2612b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Save the registers we'll use in the movem process		\n\
2622b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; on the stack.						\n\
2632b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq	11*4,$sp						\n\
2642b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	$r10,[$sp]						\n\
2652b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
2662b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Now we've got this:						\n\
2672b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r11 - src							\n\
2682b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r13 - dst							\n\
2692b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r12 - n							\n\
2702b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
2712b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Update n for the first loop					\n\
2722b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq	44,$r12							\n\
2732b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson0:									\n\
2742b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	[$r11+],$r10						\n\
2752b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson1:									\n\
2762b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq   44,$r12							\n\
2772b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	bge	0b							\n\
2782b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	$r10,[$r13+]						\n\
2792b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
2802b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq   44,$r12  ;; compensate for last loop underflowing n	\n\
2812b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
2822b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Restore registers from stack					\n\
2832b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem [$sp+],$r10						\n\
2842b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson4:									\n\
2852b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.section .fixup,\"ax\"						\n\
2862b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
2872b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; Do not jump back into the loop if we fail.  For some uses, we get a	\n\
2882b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; page fault somewhere on the line.  Without checking for page limits,	\n\
2892b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; we don't know where, but we need to copy accurately and keep an	\n\
2902b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; accurate count; not just clear the whole line.  To do that, we fall	\n\
2912b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; down in the code below, proceeding with smaller amounts.  It should	\n\
2922b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; be kept in mind that we have to cater to code like what at one time	\n\
2932b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; was in fs/super.c:							\n\
2942b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;;  i = size - copy_from_user((void *)page, data, size);		\n\
2952b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; which would cause repeated faults while clearing the remainder of	\n\
2962b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; the SIZE bytes at PAGE after the first fault.			\n\
2972b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; A caveat here is that we must not fall through from a failing page	\n\
2982b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; to a valid page.							\n\
2992b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
3002b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson3:									\n\
3012b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem  [$sp+],$r10						\n\
3022b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq	44,$r12 ;; Get back count before faulting point.	\n\
3032b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq	44,$r11 ;; Get back pointer to faulting movem-line.	\n\
3042b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	jump	4b	;; Fall through, pretending the fault didn't happen.\n\
3052b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
3062b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.previous							\n\
3072b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.section __ex_table,\"a\"					\n\
3082b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.dword 1b,3b							\n\
3091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.previous"
3101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
3121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
3131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
3151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* Either we directly start copying here, using dword copying in a loop,
3171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     or we copy as much as possible with 'movem' and then the last block
3181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     (<44 bytes) is copied here.  This will work since 'movem' will have
3191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     updated src, dst and n.  (Except with failing src.)
3201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     Since we want to keep src accurate, we can't use
3221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
3231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     retn, but not src (by design; it's value is ignored elsewhere).  */
3241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  while (n >= 4)
3261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
3271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    __asm_copy_from_user_4 (dst, src, retn);
3281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    n -= 4;
3291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if (retn)
3311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      goto copy_exception_bytes;
3321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
3331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* If we get here, there were no memory read faults.  */
3351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  switch (n)
3361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
3371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* These copies are at least "naturally aligned" (so we don't have
3381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       to check each byte), due to the src alignment code before the
3391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       movem loop.  The *_3 case *will* get the correct count for retn.  */
3401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 0:
3411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      /* This case deliberately left in (if you have doubts check the
3421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	 generated assembly code).  */
3431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
3441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 1:
3451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_from_user_1 (dst, src, retn);
3461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
3471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 2:
3481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_from_user_2 (dst, src, retn);
3491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
3501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 3:
3511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_copy_from_user_3 (dst, src, retn);
3521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
3531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
3541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* If we get here, retn correctly reflects the number of failing
3561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     bytes.  */
3571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  return retn;
3581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldscopy_exception_bytes:
3601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* We already have "retn" bytes cleared, and need to clear the
3611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
3621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     memset is preferred here, since this isn't speed-critical code and
3631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     we'd rather have this a leaf-function than calling memset.  */
3641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
3651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    char *endp;
3661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    for (endp = dst + n; dst < endp; dst++)
3671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      *dst = 0;
3681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
3691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  return retn + n;
3711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
3721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Zero userspace.  */
3741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long
3761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds__do_clear_user (void __user *pto, unsigned long pn)
3771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
3781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* We want the parameters put in special registers.
3791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     Make sure the compiler is able to make something useful of this.
3801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
3811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     FIXME: Comment for old gcc version.  Check.
38349b4ff3304b52b18c490fc4deb400b61bb7ed142Simon Arlott     If gcc was alright, it really would need no temporaries, and no
3841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     stack space to save stuff on. */
3851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register char *dst __asm__ ("r13") = pto;
3871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register int n __asm__ ("r12") = pn;
3881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  register int retn __asm__ ("r10") = 0;
3891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
3911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  if (((unsigned long) dst & 3) != 0
3921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Don't align if we wouldn't copy more than a few bytes.  */
3931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      && n >= 3)
3941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
3951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if ((unsigned long) dst & 1)
3961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    {
3971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_clear_1 (dst, retn);
3981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      n--;
3991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    }
4001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    if ((unsigned long) dst & 2)
4021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    {
4031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_clear_2 (dst, retn);
4041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      n -= 2;
4051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    }
4061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
4071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* Decide which copying method to use.
4091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     FIXME: This number is from the "ordinary" kernel memset.  */
4101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  if (n >= (1*48))
4111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
4121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* For large clears we use 'movem' */
4131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    /* It is not optimal to tell the compiler about clobbering any
4151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       call-saved registers; that will move the saving/restoring of
4161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       those registers to the function prologue/epilogue, and make
4171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       non-movem sizes suboptimal.
4181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       This method is not foolproof; it assumes that the "asm reg"
4201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       declarations at the beginning of the function really are used
4211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       here (beware: they may be moved to temporary registers).
4221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       This way, we do not have to save/move the registers around into
4231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds       temporaries; we can safely use them straight away.
4241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      If you want to check that the allocation was right; then
4261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      check the equalities in the first comment.  It should say
4271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      something like "r13=r13, r11=r11, r12=r12". */
4282b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson    __asm__ volatile ("\n\
4291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.ifnc %0%1%2,$r13$r12$r10					\n\
4301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.err								\n\
4311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.endif								\n\
4322b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
4332b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Save the registers we'll clobber in the movem process	\n\
4342b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; on the stack.  Don't mention them to gcc, it will only be	\n\
4352b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; upset.							\n\
4362b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq	11*4,$sp						\n\
4372b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	$r10,[$sp]						\n\
4382b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
4392b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r0							\n\
4402b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r1							\n\
4412b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r2							\n\
4422b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r3							\n\
4432b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r4							\n\
4442b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r5							\n\
4452b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r6							\n\
4462b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r7							\n\
4472b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r8							\n\
4482b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r9							\n\
4492b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r10							\n\
4502b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r11							\n\
4512b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
4522b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Now we've got this:						\n\
4532b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r13 - dst							\n\
4542b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; r12 - n							\n\
4552b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
4562b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Update n for the first loop					\n\
4572b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq	12*4,$r12						\n\
4582b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson0:									\n\
4592b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	subq   12*4,$r12						\n\
4602b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	bge	0b							\n\
4612b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem	$r11,[$r13+]						\n\
4622b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson1:									\n\
4632b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq   12*4,$r12        ;; compensate for last loop underflowing n\n\
4642b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
4652b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	;; Restore registers from stack					\n\
4662b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem [$sp+],$r10						\n\
4672b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson2:									\n\
4682b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.section .fixup,\"ax\"						\n\
4692b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson3:									\n\
4702b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	move.d [$sp],$r10						\n\
4712b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq 12*4,$r10							\n\
4722b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	move.d $r10,[$sp]						\n\
4732b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	clear.d $r10							\n\
4742b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	jump 0b								\n\
4752b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
4762b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson4:									\n\
4772b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	movem [$sp+],$r10						\n\
4782b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq 12*4,$r10							\n\
4792b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	addq 12*4,$r12							\n\
4802b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	jump 2b								\n\
4812b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson									\n\
4822b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.previous							\n\
4832b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.section __ex_table,\"a\"					\n\
4842b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.dword 0b,3b							\n\
4852b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson	.dword 1b,4b							\n\
4861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.previous"
4871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
4891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
4901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     /* Clobber */ : "r11");
4911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
4921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  while (n >= 16)
4941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
4951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    __asm_clear_16 (dst, retn);
4961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    n -= 16;
4971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
4981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
4991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  /* Having a separate by-four loops cuts down on cache footprint.
5001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds     FIXME:  Test with and without; increasing switch to be 0..15.  */
5011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  while (n >= 4)
5021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
5031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    __asm_clear_4 (dst, retn);
5041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    n -= 4;
5051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
5061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
5071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  switch (n)
5081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  {
5091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 0:
5101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
5111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 1:
5121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_clear_1 (dst, retn);
5131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
5141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 2:
5151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_clear_2 (dst, retn);
5161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
5171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds    case 3:
5181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      __asm_clear_3 (dst, retn);
5191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds      break;
5201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  }
5211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
5221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds  return retn;
5231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
524