11da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* 21da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * arch/alpha/lib/ev6-clear_user.S 31da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> 41da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 51da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Zero user space, handling exceptions as we go. 61da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 71da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * We have to make sure that $0 is always up-to-date and contains the 81da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * right "bytes left to zero" value (and that it is updated only _after_ 91da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * a successful copy). There is also some rather minor exception setup 101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * stuff. 111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * NOTE! This is not directly C-callable, because the calling semantics 131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * are different: 141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Inputs: 161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * length in $0 171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * destination address in $6 181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * exception pointer in $7 191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * return address in $28 (exceptions expect it there) 201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Outputs: 221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * bytes left to copy in $0 231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Clobbers: 251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $1,$2,$3,$4,$5,$6 261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Much of the information about 21264 scheduling/coding comes from: 281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Compiler Writer's Guide for the Alpha 21264 291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * abbreviated as 'CWG' in other comments here 301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html 311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Scheduling notation: 321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * E - either cluster 331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Try not to change the actual algorithm if possible for consistency. 361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Determining actual stalls (other than slotting) doesn't appear to be easy to do. 371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * From perusing the source code context where this routine is called, it is 381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * a fair assumption that significant fractions of entire pages are zeroed, so 391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * it's going to be worth the effort to hand-unroll a big loop, and use wh64. 401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * ASSUMPTION: 411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * The believed purpose of only updating $0 after a store is that a signal 421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * may come along during the execution of this chunk of code, and we don't 431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * want to leave a hole (and we also want to avoid repeating lots of work) 441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */ 451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Allow an exception for an insn; exit if we get one. */ 471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define EX(x,y...) \ 481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 99: x,##y; \ 491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .section __ex_table,"a"; \ 501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .long 99b - .; \ 511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds lda $31, $exception-99b($31); \ 521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .previous 531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .set noat 551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .set noreorder 561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .align 4 571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .globl __do_clear_user 591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .ent __do_clear_user 601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .frame $30, 0, $28 611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .prologue 0 621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # Pipeline info : Slotting & Comments 641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds__do_clear_user: 651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds and $6, 7, $4 # .. E .. .. : find dest head misalignment 661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds beq $0, $zerolength # U .. .. .. : U L U L 671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $0, $4, $1 # .. .. .. E : bias counter 691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds and $1, 7, $2 # .. .. E .. : number of misaligned bytes in tail 701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds# Note - we never actually use $2, so this is a moot computation 711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds# and we can rewrite this later... 721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds srl $1, 3, $1 # .. E .. .. : number of quadwords to clear 731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds beq $4, $headalign # U .. .. .. : U L U L 741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* 761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Head is not aligned. Write (8 - $4) bytes to head of destination 771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * This means $6 is known to be misaligned 781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */ 791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( ldq_u $5, 0($6) ) # .. .. .. L : load dst word to mask back in 801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds beq $1, $onebyte # .. .. U .. : sub-word store? 811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds mskql $5, $6, $5 # .. U .. .. : take care of misaligned head 821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $6, 8, $6 # E .. .. .. : L U U L 831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $5, -8($6) ) # .. .. .. L : 851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $1, 1, $1 # .. .. E .. : 861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment 871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 8, $0 # E .. .. .. : U L U L 881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .align 4 901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* 911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * (The .align directive ought to be a moot point) 921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * values upon initial entry to the loop 931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $1 is number of quadwords to clear (zero is a valid value) 941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $2 is number of trailing bytes (0..7) ($2 never used...) 951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $6 is known to be aligned 0mod8 961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */ 971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$headalign: 981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop 991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds and $6, 0x3f, $2 # .. .. E .. : Forward work for huge loop 1001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop) 1011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds blt $4, $trailquad # U .. .. .. : U L U L 1021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* 1041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * We know that we're going to do at least 16 quads, which means we are 1051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * going to be able to use the large block clear loop at least once. 1061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Figure out how many quads we need to clear before we are 0mod64 aligned 1071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * so we can use the wh64 instruction. 1081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */ 1091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. .. E 1111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. E .. 1121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. E .. .. 1131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64 1141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$alignmod64: 1161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 0($6) ) # .. .. .. L 1171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $3, 8, $3 # .. .. E .. 1181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 8, $0 # .. E .. .. 1191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E .. .. .. : U L U L 1201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. .. E 1221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $1, 1, $1 # .. .. E .. 1231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $6, 8, $6 # .. E .. .. 1241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds blt $3, $alignmod64 # U .. .. .. : U L U L 1251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$bigalign: 1271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* 1281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $0 is the number of bytes left 1291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $1 is the number of quads left 1301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $6 is aligned 0mod64 1311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * we know that we'll be taking a minimum of one trip through 1321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle 1331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * We are _not_ going to update $0 after every single store. That 1341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * would be silly, because there will be cross-cluster dependencies 1351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * no matter how the code is scheduled. By doing it in slightly 1361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * staggered fashion, we can still do this loop in 5 fetches 1371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * The worse case will be doing two extra quads in some future execution, 1381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * in the event of an interrupted clear. 1391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Assumes the wh64 needs to be for 2 trips through the loop in the future 1401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * The wh64 is issued on for the starting destination address for trip +2 1411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * through the loop, and if there are less than two trips left, the target 1421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * address will be for the current trip. 1431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */ 1441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E : 1451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E : 1461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E : 1471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds bis $6,$6,$3 # E : U L U L : Initial wh64 address is dest 1481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* This might actually help for the current trip... */ 1491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$do_wh64: 1511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds wh64 ($3) # .. .. .. L1 : memory subsystem hint 1521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop? 1531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 0($6) ) # .. L .. .. 1541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 8, $0 # E .. .. .. : U L U L 1551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $6, 128, $3 # E : Target address of wh64 1571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 8($6) ) # L : 1581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 16($6) ) # L : 1591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 16, $0 # E : U L L U 1601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E : 1621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 24($6) ) # L : 1631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 32($6) ) # L : 1641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 168, $5 # E : U L L U : two trips through the loop left? 1651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds /* 168 = 192 - 24, since we've already completed some stores */ 1661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 16, $0 # E : 1681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 40($6) ) # L : 1691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 48($6) ) # L : 1701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds cmovlt $5, $6, $3 # E : U L L U : Latency 2, extra mapping cycle 1711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $1, 8, $1 # E : 1731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 16, $0 # E : 1741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 56($6) ) # L : 1751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E : U L U L 1761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E : 1781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 8, $0 # E : 1791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $6, 64, $6 # E : 1801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds bge $4, $do_wh64 # U : U L U L 1811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$trailquad: 1831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # zero to 16 quadwords left to store, plus any trailing bytes 1841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # $1 is the number of quadwords left to go. 1851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # 1861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. .. E 1871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. E .. 1881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. E .. .. 1891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go 1901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$onequad: 1921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stq_u $31, 0($6) ) # .. .. .. L 1931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $1, 1, $1 # .. .. E .. 1941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 8, $0 # .. E .. .. 1951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # E .. .. .. : U L U L 1961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 1971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. .. E 1981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. E .. 1991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $6, 8, $6 # .. E .. .. 2001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds bgt $1, $onequad # U .. .. .. : U L U L 2011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # We have an unknown number of bytes left to go. 2031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$trailbytes: 2041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. .. E 2051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. E .. 2061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. E .. .. 2071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds beq $0, $zerolength # U .. .. .. : U L U L 2081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # $0 contains the number of bytes left to copy (0..31) 2101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # so we will use $0 as the loop counter 2111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds # We know for a fact that $0 > 0 zero due to previous context 2121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$onebyte: 2131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds EX( stb $31, 0($6) ) # .. .. .. L 2141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds subq $0, 1, $0 # .. .. E .. : 2151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds addq $6, 1, $6 # .. E .. .. : 2161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds bgt $0, $onebyte # U .. .. .. : U L U L 2171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 2181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$zerolength: 2191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$exception: # Destination for exception recovery(?) 2201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. .. E : 2211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. .. E .. : 2221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds nop # .. E .. .. : 2231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds ret $31, ($28), 1 # L0 .. .. .. : L U L U 2241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds .end __do_clear_user 2251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds 226