11da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds/*
21da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
31da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Optimized version of the standard copy_page() function
41da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
51da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Inputs:
61da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	in0:	address of target page
71da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	in1:	address of source page
81da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Output:
91da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	no return value
101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Copyright (C) 1999, 2001 Hewlett-Packard Co
121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	Stephane Eranian <eranian@hpl.hp.com>
131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *	David Mosberger <davidm@hpl.hp.com>
141da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds *
151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
161da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/asmmacro.h>
181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/page.h>
191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define PIPE_DEPTH	3
211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define EPI		p[PIPE_DEPTH-1]
221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define lcount		r16
241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define saved_pr	r17
251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define saved_lc	r18
261da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define saved_pfs	r19
271da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define src1		r20
281da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define src2		r21
291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define tgt1		r22
301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define tgt2		r23
311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define srcf		r24
321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define tgtf		r25
331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define tgt_last	r26
341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#define Nrot		((8*PIPE_DEPTH+7)&~7)
361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus TorvaldsGLOBAL_ENTRY(copy_page)
381da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.prologue
391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.save ar.pfs, saved_pfs
401da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
411da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
431da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	      t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.rotp p[PIPE_DEPTH]
451da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
461da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.save ar.lc, saved_lc
471da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov saved_lc=ar.lc
481da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov ar.ec=PIPE_DEPTH
491da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
501da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov lcount=PAGE_SIZE/64-1
511da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.save pr, saved_pr
521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov saved_pr=pr
531da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov pr.rot=1<<16
541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	.body
561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
571da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov src1=in1
581da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	adds src2=8,in1
591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov tgt_last = PAGE_SIZE
601da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	;;
611da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	adds tgt2=8,in0
621da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	add srcf=512,in1
631da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov ar.lc=lcount
641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov tgt1=in0
651da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	add tgtf=512,in0
661da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	add tgt_last = tgt_last, in0
671da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	;;
681da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds1:
691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t1[0]=[src1],16
701da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt1]=t1[PIPE_DEPTH-1],16
711da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t2[0]=[src2],16
721da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt2]=t2[PIPE_DEPTH-1],16
731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	cmp.ltu p6,p0 = tgtf, tgt_last
741da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	;;
751da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t3[0]=[src1],16
761da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt1]=t3[PIPE_DEPTH-1],16
771da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t4[0]=[src2],16
781da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt2]=t4[PIPE_DEPTH-1],16
791da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	;;
801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t5[0]=[src1],16
811da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt1]=t5[PIPE_DEPTH-1],16
821da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t6[0]=[src2],16
831da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt2]=t6[PIPE_DEPTH-1],16
841da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	;;
851da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t7[0]=[src1],16
861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt1]=t7[PIPE_DEPTH-1],16
871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p[0])	ld8 t8[0]=[src2],16
881da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(EPI)	st8 [tgt2]=t8[PIPE_DEPTH-1],16
891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
901da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p6)	lfetch [srcf], 64
911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds(p6)	lfetch [tgtf], 64
921da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	br.ctop.sptk.few 1b
931da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	;;
941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov pr=saved_pr,0xffffffffffff0000	// restore predicates
951da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov ar.pfs=saved_pfs
961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	mov ar.lc=saved_lc
971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	br.ret.sptk.many rp
981da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus TorvaldsEND(copy_page)
99