11da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
21da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * arch/alpha/lib/ev6-clear_user.S
31da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
41da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
51da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Zero user space, handling exceptions as we go.
61da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
71da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * We have to make sure that $0 is always up-to-date and contains the
81da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * right "bytes left to zero" value (and that it is updated only _after_
91da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * a successful copy).  There is also some rather minor exception setup
101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * stuff.
111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * NOTE! This is not directly C-callable, because the calling semantics
131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * are different:
141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Inputs:
161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	length in $0
171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	destination address in $6
181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	exception pointer in $7
191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	return address in $28 (exceptions expect it there)
201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Outputs:
221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	bytes left to copy in $0
231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Clobbers:
251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	$1,$2,$3,$4,$5,$6
261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Much of the information about 21264 scheduling/coding comes from:
281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	Compiler Writer's Guide for the Alpha 21264
291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	abbreviated as 'CWG' in other comments here
301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Scheduling notation:
321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	E	- either cluster
331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Try not to change the actual algorithm if possible for consistency.
361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Determining actual stalls (other than slotting) doesn't appear to be easy to do.
371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * From perusing the source code context where this routine is called, it is
381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * a fair assumption that significant fractions of entire pages are zeroed, so
391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * it's going to be worth the effort to hand-unroll a big loop, and use wh64.
401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * ASSUMPTION:
411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	The believed purpose of only updating $0 after a store is that a signal
421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	may come along during the execution of this chunk of code, and we don't
431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	want to leave a hole (and we also want to avoid repeating lots of work)
441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/* Allow an exception for an insn; exit if we get one.  */
471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define EX(x,y...)			\
481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	99: x,##y;			\
491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.section __ex_table,"a";	\
501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.long 99b - .;			\
511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	lda $31, $exception-99b($31); 	\
521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.previous
531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.set noat
551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.set noreorder
561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.align 4
571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.globl __do_clear_user
591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.ent __do_clear_user
601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.frame	$30, 0, $28
611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.prologue 0
621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds				# Pipeline info : Slotting & Comments
641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds__do_clear_user:
651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	and	$6, 7, $4	# .. E  .. ..	: find dest head misalignment
661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	beq	$0, $zerolength # U  .. .. ..	:  U L U L
671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$0, $4, $1	# .. .. .. E	: bias counter
691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	and	$1, 7, $2	# .. .. E  ..	: number of misaligned bytes in tail
701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds# Note - we never actually use $2, so this is a moot computation
711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds# and we can rewrite this later...
721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	srl	$1, 3, $1	# .. E  .. ..	: number of quadwords to clear
731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	beq	$4, $headalign	# U  .. .. ..	: U L U L
741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Head is not aligned.  Write (8 - $4) bytes to head of destination
771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * This means $6 is known to be misaligned
781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( ldq_u $5, 0($6) )	# .. .. .. L	: load dst word to mask back in
801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	beq	$1, $onebyte	# .. .. U  ..	: sub-word store?
811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mskql	$5, $6, $5	# .. U  .. ..	: take care of misaligned head
821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$6, 8, $6	# E  .. .. .. 	: L U U L
831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $5, -8($6) )	# .. .. .. L	:
851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$1, 1, $1	# .. .. E  ..	:
861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$0, $4, $0	# .. E  .. ..	: bytes left -= 8 - misalignment
871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 8, $0	# E  .. .. ..	: U L U L
881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.align	4
901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * (The .align directive ought to be a moot point)
921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * values upon initial entry to the loop
931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $1 is number of quadwords to clear (zero is a valid value)
941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $2 is number of trailing bytes (0..7) ($2 never used...)
951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $6 is known to be aligned 0mod8
961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$headalign:
981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$1, 16, $4	# .. .. .. E	: If < 16, we can not use the huge loop
991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	and	$6, 0x3f, $2	# .. .. E  ..	: Forward work for huge loop
1001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$2, 0x40, $3	# .. E  .. ..	: bias counter (huge loop)
1011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	blt	$4, $trailquad	# U  .. .. ..	: U L U L
1021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
1041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * We know that we're going to do at least 16 quads, which means we are
1051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * going to be able to use the large block clear loop at least once.
1061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Figure out how many quads we need to clear before we are 0mod64 aligned
1071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * so we can use the wh64 instruction.
1081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
1091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. .. E
1111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. E  ..
1121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. E  .. ..
1131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	beq	$3, $bigalign	# U  .. .. ..	: U L U L : Aligned 0mod64
1141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$alignmod64:
1161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 0($6) )	# .. .. .. L
1171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$3, 8, $3	# .. .. E  ..
1181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 8, $0	# .. E  .. ..
1191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E  .. .. ..	: U L U L
1201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. .. E
1221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$1, 1, $1	# .. .. E  ..
1231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$6, 8, $6	# .. E  .. ..
1241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	blt	$3, $alignmod64	# U  .. .. ..	: U L U L
1251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$bigalign:
1271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
1281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $0 is the number of bytes left
1291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $1 is the number of quads left
1301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * $6 is aligned 0mod64
1311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * we know that we'll be taking a minimum of one trip through
1321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
1331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * We are _not_ going to update $0 after every single store.  That
1341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * would be silly, because there will be cross-cluster dependencies
1351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * no matter how the code is scheduled.  By doing it in slightly
1361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * staggered fashion, we can still do this loop in 5 fetches
1371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * The worse case will be doing two extra quads in some future execution,
1381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * in the event of an interrupted clear.
1391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Assumes the wh64 needs to be for 2 trips through the loop in the future
1401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * The wh64 is issued on for the starting destination address for trip +2
1411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * through the loop, and if there are less than two trips left, the target
1421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * address will be for the current trip.
1431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
1441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E :
1451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E :
1461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E :
1471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	bis	$6,$6,$3	# E : U L U L : Initial wh64 address is dest
1481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* This might actually help for the current trip... */
1491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$do_wh64:
1511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	wh64	($3)		# .. .. .. L1	: memory subsystem hint
1521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$1, 16, $4	# .. .. E  ..	: Forward calculation - repeat the loop?
1531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 0($6) )	# .. L  .. ..
1541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 8, $0	# E  .. .. ..	: U L U L
1551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$6, 128, $3	# E : Target address of wh64
1571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 8($6) )	# L :
1581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 16($6) )	# L :
1591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 16, $0	# E : U L L U
1601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E :
1621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 24($6) )	# L :
1631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 32($6) )	# L :
1641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 168, $5	# E : U L L U : two trips through the loop left?
1651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	/* 168 = 192 - 24, since we've already completed some stores */
1661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 16, $0	# E :
1681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 40($6) )	# L :
1691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 48($6) )	# L :
1701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	cmovlt	$5, $6, $3	# E : U L L U : Latency 2, extra mapping cycle
1711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$1, 8, $1	# E :
1731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 16, $0	# E :
1741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 56($6) )	# L :
1751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E : U L U L
1761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E :
1781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 8, $0	# E :
1791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$6, 64, $6	# E :
1801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	bge	$4, $do_wh64	# U : U L U L
1811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$trailquad:
1831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	# zero to 16 quadwords left to store, plus any trailing bytes
1841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	# $1 is the number of quadwords left to go.
1851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	#
1861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. .. E
1871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. E  ..
1881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. E  .. ..
1891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	beq	$1, $trailbytes	# U  .. .. ..	: U L U L : Only 0..7 bytes to go
1901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$onequad:
1921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stq_u $31, 0($6) )	# .. .. .. L
1931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$1, 1, $1	# .. .. E  ..
1941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 8, $0	# .. E  .. ..
1951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# E  .. .. ..	: U L U L
1961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. .. E
1981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. E  ..
1991da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$6, 8, $6	# .. E  .. ..
2001da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	bgt	$1, $onequad	# U  .. .. ..	: U L U L
2011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	# We have an unknown number of bytes left to go.
2031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$trailbytes:
2041da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. .. E
2051da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. E  ..
2061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. E  .. ..
2071da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	beq	$0, $zerolength	# U  .. .. ..	: U L U L
2081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2091da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	# $0 contains the number of bytes left to copy (0..31)
2101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	# so we will use $0 as the loop counter
2111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	# We know for a fact that $0 > 0 zero due to previous context
2121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$onebyte:
2131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	EX( stb $31, 0($6) )	# .. .. .. L
2141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	subq	$0, 1, $0	# .. .. E  ..	:
2151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	addq	$6, 1, $6	# .. E  .. ..	:
2161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	bgt	$0, $onebyte	# U  .. .. ..	: U L U L
2171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
2181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$zerolength:
2191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds$exception:			# Destination for exception recovery(?)
2201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. .. E	:
2211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. .. E  ..	:
2221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	nop			# .. E  .. ..	:
2231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	ret	$31, ($28), 1	# L0 .. .. ..	: L U L U
2241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.end __do_clear_user
2251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
226