11da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* 21da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * User address space access functions. 31da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * The non-inlined parts of asm-cris/uaccess.h are here. 41da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 51da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Copyright (C) 2000, Axis Communications AB. 61da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 71da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Written by Hans-Peter Nilsson. 81da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Pieces used from memcpy, originally by Kenny Ranerup long time ago. 91da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */ 101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/uaccess.h> 121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Asm:s have been tweaked (within the domain of correctness) to give 141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds satisfactory results for "gcc version 2.96 20000427 (experimental)". 151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds Check regularly... 171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds Note that the PC saved at a bus-fault is the address *after* the 191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds faulting instruction, which means the branch-target for instructions in 201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds delay-slots for taken branches. Note also that the postincrement in 211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds the instruction is performed regardless of bus-fault; the register is 221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds seen updated in fault handlers. 231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds Oh, and on the code formatting issue, to whomever feels like "fixing 251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix" 261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds string.c too. I just don't think too many people will hack this file 271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds for the code format to be an issue. */ 281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Copy to userspace. This is based on the memcpy used for 311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds kernel-to-kernel copying; see "string.c". */ 321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long 341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds__copy_user (void __user *pdst, const void *psrc, unsigned long pn) 351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{ 361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* We want the parameters put in special registers. 371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds Make sure the compiler is able to make something useful of this. 381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). 391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds FIXME: Comment for old gcc version. Check. 4149b4ff3304b52b18c490fc4deb400b61bb7ed142Simon Arlott If gcc was alright, it really would need no temporaries, and no 421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds stack space to save stuff on. */ 431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register char *dst __asm__ ("r13") = pdst; 451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register const char *src __asm__ ("r11") = psrc; 461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register int n __asm__ ("r12") = pn; 471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register int retn __asm__ ("r10") = 0; 481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* When src is aligned but not dst, this makes a few extra needless 511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds cycles. I believe it would take as many to check that the 521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds re-alignment was unnecessary. */ 531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (((unsigned long) dst & 3) != 0 541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Don't align if we wouldn't copy more than a few bytes; so we 551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds don't have to check further for overflows. */ 561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds && n >= 3) 571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if ((unsigned long) dst & 1) 591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_to_user_1 (dst, src, retn); 611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n--; 621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if ((unsigned long) dst & 2) 651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_to_user_2 (dst, src, retn); 671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 2; 681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Decide which copying method to use. */ 721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (n >= 44*2) /* Break even between movem and 731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds move16 is at 38.7*2, but modulo 44. */ 741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* For large copies we use 'movem'. */ 761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* It is not optimal to tell the compiler about clobbering any 781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds registers; that will move the saving/restoring of those registers 791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds to the function prologue/epilogue, and make non-movem sizes 801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds suboptimal. 811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds This method is not foolproof; it assumes that the "asm reg" 831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds declarations at the beginning of the function really are used 841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds here (beware: they may be moved to temporary registers). 851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds This way, we do not have to save/move the registers around into 861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds temporaries; we can safely use them straight away. 871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds If you want to check that the allocation was right; then 891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds check the equalities in the first comment. It should say 901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds "r13=r13, r11=r11, r12=r12". */ 911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm__ volatile ("\ 921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ 931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .err \n\ 941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .endif \n\ 952b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 962b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Save the registers we'll use in the movem process \n\ 972b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; on the stack. \n\ 982b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 11*4,$sp \n\ 992b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem $r10,[$sp] \n\ 1002b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1012b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Now we've got this: \n\ 1022b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r11 - src \n\ 1032b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r13 - dst \n\ 1042b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r12 - n \n\ 1052b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1062b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Update n for the first loop \n\ 1072b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 44,$r12 \n\ 1082b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1092b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\ 1102b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; branch, is that of the branch target, we actually point at the from-movem \n\ 1112b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; for this case. There is no ambiguity here; if there was a fault in that \n\ 1122b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; instruction (meaning a kernel oops), the faulted PC would be the address \n\ 1132b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; after *that* movem. \n\ 1142b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1152b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson0: \n\ 1162b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$r11+],$r10 \n\ 1172b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 44,$r12 \n\ 1182b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson bge 0b \n\ 1192b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem $r10,[$r13+] \n\ 1202b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson1: \n\ 1212b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 44,$r12 ;; compensate for last loop underflowing n \n\ 1222b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1232b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Restore registers from stack \n\ 1242b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$sp+],$r10 \n\ 1252b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson2: \n\ 1262b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .section .fixup,\"ax\" \n\ 1272b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1282b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; To provide a correct count in r10 of bytes that failed to be copied, \n\ 1292b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; we jump back into the loop if the loop-branch was taken. There is no \n\ 1302b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson; performance penalty for sany use; the program will segfault soon enough.\n\ 1312b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1322b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson3: \n\ 1332b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson move.d [$sp],$r10 \n\ 1342b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 44,$r10 \n\ 1352b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson move.d $r10,[$sp] \n\ 1362b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson jump 0b \n\ 1372b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson4: \n\ 1382b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$sp+],$r10 \n\ 1392b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 44,$r10 \n\ 1402b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 44,$r12 \n\ 1412b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson jump 2b \n\ 1422b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 1432b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .previous \n\ 1442b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .section __ex_table,\"a\" \n\ 1452b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .dword 0b,3b \n\ 1462b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .dword 1b,4b \n\ 1471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .previous" 1481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) 1501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); 1511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 1531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Either we directly start copying, using dword copying in a loop, or 1551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds we copy as much as possible with 'movem' and then the last block (<44 1561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds bytes) is copied here. This will work since 'movem' will have 1571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds updated SRC, DST and N. */ 1581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds while (n >= 16) 1601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 1611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_to_user_16 (dst, src, retn); 1621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 16; 1631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 1641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Having a separate by-four loops cuts down on cache footprint. 1661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds FIXME: Test with and without; increasing switch to be 0..15. */ 1671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds while (n >= 4) 1681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 1691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_to_user_4 (dst, src, retn); 1701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 4; 1711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 1721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds switch (n) 1741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 1751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 0: 1761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 1771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 1: 1781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_to_user_1 (dst, src, retn); 1791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 1801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 2: 1811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_to_user_2 (dst, src, retn); 1821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 1831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 3: 1841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_to_user_3 (dst, src, retn); 1851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 1861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 1871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds return retn; 1891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds} 1901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Copy from user to kernel, zeroing the bytes that were inaccessible in 1921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds userland. The return-value is the number of bytes that were 1931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds inaccessible. */ 1941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long 19607f2402b4adbcd0e6822ddc27953b63d4504faecJesper Nilsson__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) 1971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{ 1981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* We want the parameters put in special registers. 1991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds Make sure the compiler is able to make something useful of this. 2001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). 2011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds FIXME: Comment for old gcc version. Check. 20349b4ff3304b52b18c490fc4deb400b61bb7ed142Simon Arlott If gcc was alright, it really would need no temporaries, and no 2041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds stack space to save stuff on. */ 2051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register char *dst __asm__ ("r13") = pdst; 2071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register const char *src __asm__ ("r11") = psrc; 2081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register int n __asm__ ("r12") = pn; 2091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register int retn __asm__ ("r10") = 0; 2101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* The best reason to align src is that we then know that a read-fault 2121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds was for aligned bytes; there's no 1..3 remaining good bytes to 2131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds pickle. */ 2141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (((unsigned long) src & 3) != 0) 2151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 2161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (((unsigned long) src & 1) && n != 0) 2171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 2181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_from_user_1 (dst, src, retn); 2191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n--; 2201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 2211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (((unsigned long) src & 2) && n >= 2) 2231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 2241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_from_user_2 (dst, src, retn); 2251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 2; 2261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 2271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* We only need one check after the unalignment-adjustments, because 2291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if both adjustments were done, either both or neither reference 2301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds had an exception. */ 2311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (retn != 0) 2321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds goto copy_exception_bytes; 2331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 2341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Decide which copying method to use. */ 2361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (n >= 44*2) /* Break even between movem and 2371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds move16 is at 38.7*2, but modulo 44. 2381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds FIXME: We use move4 now. */ 2391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 2401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* For large copies we use 'movem' */ 2411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* It is not optimal to tell the compiler about clobbering any 2431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds registers; that will move the saving/restoring of those registers 2441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds to the function prologue/epilogue, and make non-movem sizes 2451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds suboptimal. 2461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds This method is not foolproof; it assumes that the "asm reg" 2481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds declarations at the beginning of the function really are used 2491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds here (beware: they may be moved to temporary registers). 2501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds This way, we do not have to save/move the registers around into 2511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds temporaries; we can safely use them straight away. 2521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds If you want to check that the allocation was right; then 2541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds check the equalities in the first comment. It should say 2551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds "r13=r13, r11=r11, r12=r12" */ 2562b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson __asm__ volatile ("\n\ 2571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ 2581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .err \n\ 2591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .endif \n\ 2602b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 2612b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Save the registers we'll use in the movem process \n\ 2622b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; on the stack. \n\ 2632b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 11*4,$sp \n\ 2642b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem $r10,[$sp] \n\ 2652b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 2662b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Now we've got this: \n\ 2672b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r11 - src \n\ 2682b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r13 - dst \n\ 2692b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r12 - n \n\ 2702b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 2712b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Update n for the first loop \n\ 2722b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 44,$r12 \n\ 2732b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson0: \n\ 2742b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$r11+],$r10 \n\ 2752b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson1: \n\ 2762b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 44,$r12 \n\ 2772b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson bge 0b \n\ 2782b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem $r10,[$r13+] \n\ 2792b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 2802b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 44,$r12 ;; compensate for last loop underflowing n \n\ 2812b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 2822b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Restore registers from stack \n\ 2832b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$sp+],$r10 \n\ 2842b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson4: \n\ 2852b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .section .fixup,\"ax\" \n\ 2862b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 2872b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; Do not jump back into the loop if we fail. For some uses, we get a \n\ 2882b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; page fault somewhere on the line. Without checking for page limits, \n\ 2892b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; we don't know where, but we need to copy accurately and keep an \n\ 2902b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; accurate count; not just clear the whole line. To do that, we fall \n\ 2912b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; down in the code below, proceeding with smaller amounts. It should \n\ 2922b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; be kept in mind that we have to cater to code like what at one time \n\ 2932b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; was in fs/super.c: \n\ 2942b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; i = size - copy_from_user((void *)page, data, size); \n\ 2952b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; which would cause repeated faults while clearing the remainder of \n\ 2962b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; the SIZE bytes at PAGE after the first fault. \n\ 2972b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; A caveat here is that we must not fall through from a failing page \n\ 2982b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson;; to a valid page. \n\ 2992b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 3002b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson3: \n\ 3012b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$sp+],$r10 \n\ 3022b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 44,$r12 ;; Get back count before faulting point. \n\ 3032b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 44,$r11 ;; Get back pointer to faulting movem-line. \n\ 3042b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson jump 4b ;; Fall through, pretending the fault didn't happen.\n\ 3052b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 3062b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .previous \n\ 3072b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .section __ex_table,\"a\" \n\ 3082b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .dword 1b,3b \n\ 3091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .previous" 3101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) 3121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); 3131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 3151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Either we directly start copying here, using dword copying in a loop, 3171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds or we copy as much as possible with 'movem' and then the last block 3181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds (<44 bytes) is copied here. This will work since 'movem' will have 3191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds updated src, dst and n. (Except with failing src.) 3201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds Since we want to keep src accurate, we can't use 3221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and 3231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds retn, but not src (by design; it's value is ignored elsewhere). */ 3241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds while (n >= 4) 3261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 3271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_from_user_4 (dst, src, retn); 3281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 4; 3291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (retn) 3311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds goto copy_exception_bytes; 3321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 3331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* If we get here, there were no memory read faults. */ 3351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds switch (n) 3361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 3371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* These copies are at least "naturally aligned" (so we don't have 3381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds to check each byte), due to the src alignment code before the 3391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds movem loop. The *_3 case *will* get the correct count for retn. */ 3401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 0: 3411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* This case deliberately left in (if you have doubts check the 3421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds generated assembly code). */ 3431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 3441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 1: 3451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_from_user_1 (dst, src, retn); 3461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 3471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 2: 3481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_from_user_2 (dst, src, retn); 3491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 3501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 3: 3511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_copy_from_user_3 (dst, src, retn); 3521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 3531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 3541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* If we get here, retn correctly reflects the number of failing 3561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds bytes. */ 3571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds return retn; 3581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldscopy_exception_bytes: 3601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* We already have "retn" bytes cleared, and need to clear the 3611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds remaining "n" bytes. A non-optimized simple byte-for-byte in-line 3621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds memset is preferred here, since this isn't speed-critical code and 3631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds we'd rather have this a leaf-function than calling memset. */ 3641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 3651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds char *endp; 3661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds for (endp = dst + n; dst < endp; dst++) 3671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *dst = 0; 3681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 3691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds return retn + n; 3711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds} 3721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Zero userspace. */ 3741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsunsigned long 3761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds__do_clear_user (void __user *pto, unsigned long pn) 3771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{ 3781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* We want the parameters put in special registers. 3791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds Make sure the compiler is able to make something useful of this. 3801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). 3811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds FIXME: Comment for old gcc version. Check. 38349b4ff3304b52b18c490fc4deb400b61bb7ed142Simon Arlott If gcc was alright, it really would need no temporaries, and no 3841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds stack space to save stuff on. */ 3851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register char *dst __asm__ ("r13") = pto; 3871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register int n __asm__ ("r12") = pn; 3881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds register int retn __asm__ ("r10") = 0; 3891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 3911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (((unsigned long) dst & 3) != 0 3921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Don't align if we wouldn't copy more than a few bytes. */ 3931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds && n >= 3) 3941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 3951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if ((unsigned long) dst & 1) 3961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 3971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_clear_1 (dst, retn); 3981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n--; 3991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 4001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if ((unsigned long) dst & 2) 4021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 4031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_clear_2 (dst, retn); 4041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 2; 4051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 4061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 4071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Decide which copying method to use. 4091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds FIXME: This number is from the "ordinary" kernel memset. */ 4101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds if (n >= (1*48)) 4111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 4121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* For large clears we use 'movem' */ 4131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* It is not optimal to tell the compiler about clobbering any 4151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds call-saved registers; that will move the saving/restoring of 4161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds those registers to the function prologue/epilogue, and make 4171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds non-movem sizes suboptimal. 4181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds This method is not foolproof; it assumes that the "asm reg" 4201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds declarations at the beginning of the function really are used 4211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds here (beware: they may be moved to temporary registers). 4221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds This way, we do not have to save/move the registers around into 4231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds temporaries; we can safely use them straight away. 4241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds If you want to check that the allocation was right; then 4261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds check the equalities in the first comment. It should say 4271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds something like "r13=r13, r11=r11, r12=r12". */ 4282b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson __asm__ volatile ("\n\ 4291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .ifnc %0%1%2,$r13$r12$r10 \n\ 4301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .err \n\ 4311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .endif \n\ 4322b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 4332b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Save the registers we'll clobber in the movem process \n\ 4342b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; on the stack. Don't mention them to gcc, it will only be \n\ 4352b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; upset. \n\ 4362b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 11*4,$sp \n\ 4372b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem $r10,[$sp] \n\ 4382b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 4392b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r0 \n\ 4402b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r1 \n\ 4412b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r2 \n\ 4422b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r3 \n\ 4432b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r4 \n\ 4442b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r5 \n\ 4452b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r6 \n\ 4462b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r7 \n\ 4472b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r8 \n\ 4482b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r9 \n\ 4492b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r10 \n\ 4502b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r11 \n\ 4512b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 4522b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Now we've got this: \n\ 4532b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r13 - dst \n\ 4542b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; r12 - n \n\ 4552b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 4562b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Update n for the first loop \n\ 4572b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 12*4,$r12 \n\ 4582b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson0: \n\ 4592b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson subq 12*4,$r12 \n\ 4602b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson bge 0b \n\ 4612b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem $r11,[$r13+] \n\ 4622b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson1: \n\ 4632b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 12*4,$r12 ;; compensate for last loop underflowing n\n\ 4642b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 4652b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson ;; Restore registers from stack \n\ 4662b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$sp+],$r10 \n\ 4672b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson2: \n\ 4682b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .section .fixup,\"ax\" \n\ 4692b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson3: \n\ 4702b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson move.d [$sp],$r10 \n\ 4712b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 12*4,$r10 \n\ 4722b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson move.d $r10,[$sp] \n\ 4732b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson clear.d $r10 \n\ 4742b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson jump 0b \n\ 4752b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 4762b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson4: \n\ 4772b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson movem [$sp+],$r10 \n\ 4782b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 12*4,$r10 \n\ 4792b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson addq 12*4,$r12 \n\ 4802b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson jump 2b \n\ 4812b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson \n\ 4822b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .previous \n\ 4832b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .section __ex_table,\"a\" \n\ 4842b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .dword 0b,3b \n\ 4852b05d2b3b4d1e59e8710ec9274684d0d13eee34dJesper Nilsson .dword 1b,4b \n\ 4861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .previous" 4871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) 4891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Inputs */ : "0" (dst), "1" (n), "2" (retn) 4901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Clobber */ : "r11"); 4911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 4921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds while (n >= 16) 4941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 4951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_clear_16 (dst, retn); 4961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 16; 4971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 4981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 4991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* Having a separate by-four loops cuts down on cache footprint. 5001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds FIXME: Test with and without; increasing switch to be 0..15. */ 5011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds while (n >= 4) 5021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 5031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_clear_4 (dst, retn); 5041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds n -= 4; 5051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 5061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 5071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds switch (n) 5081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds { 5091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 0: 5101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 5111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 1: 5121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_clear_1 (dst, retn); 5131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 5141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 2: 5151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_clear_2 (dst, retn); 5161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 5171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds case 3: 5181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds __asm_clear_3 (dst, retn); 5191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds break; 5201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds } 5211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 5221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds return retn; 5231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds} 524