18ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare/*
28ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareCopyright (c) 2010, Intel Corporation
38ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareAll rights reserved.
48ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
58ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareRedistribution and use in source and binary forms, with or without
68ff1a2759a6389bed30d7862d0beb76077032c99Bruce Bearemodification, are permitted provided that the following conditions are met:
78ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
88ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * Redistributions of source code must retain the above copyright notice,
98ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * this list of conditions and the following disclaimer.
108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * Redistributions in binary form must reproduce the above copyright notice,
128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * this list of conditions and the following disclaimer in the documentation
138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * and/or other materials provided with the distribution.
148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * Neither the name of Intel Corporation nor the names of its contributors
168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * may be used to endorse or promote products derived from this software
178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare    * without specific prior written permission.
188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
198ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
208ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
218ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
228ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
238ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
258ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
268ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
288ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare*/
308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
310a490665a3a287cd3aee1e7327f2381222c387c4Liubov Dmitrieva#include "cache.h"
320a490665a3a287cd3aee1e7327f2381222c387c4Liubov Dmitrieva
338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef MEMCPY
340a490665a3a287cd3aee1e7327f2381222c387c4Liubov Dmitrieva# define MEMCPY	memcpy
358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef L
388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define L(label)	.L##label
398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef cfi_startproc
42c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# define cfi_startproc	.cfi_startproc
438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef cfi_endproc
46c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# define cfi_endproc	.cfi_endproc
478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef cfi_rel_offset
508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef cfi_restore
54c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# define cfi_restore(reg)	.cfi_restore reg
558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef cfi_adjust_cfa_offset
588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef ENTRY
62c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# define ENTRY(name)		\
63c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.type name,  @function;		\
64c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.globl name;		\
65c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4;		\
66c47703a521abab120100673d5281f71bc8ba9a49Jack Renname:		\
678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cfi_startproc
688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef END
71c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# define END(name)		\
72c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cfi_endproc;		\
738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.size name, .-name
748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifdef USE_AS_BCOPY
778ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define SRC		PARMS
788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define DEST		SRC+4
798ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define LEN		DEST+4
808ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#else
818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define DEST		PARMS
828ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define SRC		DEST+4
838ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define LEN		SRC+4
848ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
86c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#define CFI_PUSH(REG)		\
87c47703a521abab120100673d5281f71bc8ba9a49Jack Ren  cfi_adjust_cfa_offset (4);		\
888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare  cfi_rel_offset (REG, 0)
898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
90c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#define CFI_POP(REG)		\
91c47703a521abab120100673d5281f71bc8ba9a49Jack Ren  cfi_adjust_cfa_offset (-4);		\
928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare  cfi_restore (REG)
938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#define POP(REG)	popl REG; CFI_POP (REG)
968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
975982e33aca9dde3ba7b2487c4866e52c667485faNick Kralevich#if (defined SHARED || defined __PIC__)
988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define PARMS		8		/* Preserve EBX.  */
998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define ENTRANCE	PUSH (%ebx);
1008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define RETURN_END	POP (%ebx); ret
1018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define RETURN		RETURN_END; CFI_PUSH (%ebx)
1028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define JMPTBL(I, B)	I - B
103c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
1058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
107c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jump table with relative offsets.  INDEX is a register contains the
108c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	index into the jump table.   SCALE is the scale of INDEX. */
109c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
111c47703a521abab120100673d5281f71bc8ba9a49Jack Ren    /* We first load PC into EBX.  */		\
112c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx);		\
113c47703a521abab120100673d5281f71bc8ba9a49Jack Ren    /* Get the address of the jump table.  */		\
114c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	addl	$(TABLE - .), %ebx;		\
115c47703a521abab120100673d5281f71bc8ba9a49Jack Ren    /* Get the entry and convert the relative offset to the		\
116c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	absolute	address.  */		\
117c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	addl	(%ebx, INDEX, SCALE), %ebx;		\
118c47703a521abab120100673d5281f71bc8ba9a49Jack Ren    /* We loaded the jump table.  Go.  */		\
119c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jmp	*%ebx
1208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#else
121c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define PARMS		4
1238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define ENTRANCE
1248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define RETURN_END	ret
1258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define RETURN		RETURN_END
1268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# define JMPTBL(I, B)	I
1278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare/* Branch to an entry in a jump table.  TABLE is a jump table with
129c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	absolute offsets.  INDEX is a register contains the index into the
130c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jump table.  SCALE is the scale of INDEX. */
1318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
132c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
133c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jmp	*TABLE(, INDEX, SCALE)
1348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
1358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.section .text.ssse3,"ax",@progbits
1378ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareENTRY (MEMCPY)
1388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	ENTRANCE
1398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	LEN(%esp), %ecx
1408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	SRC(%esp), %eax
1418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %edx
1428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifdef USE_AS_MEMMOVE
1448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	%eax, %edx
1458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(copy_forward)
1468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	je	L(fwd_write_0bytes)
1478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$32, %ecx
1488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(memmove_bwd)
1498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jmp	L(bk_write_less32bytes_2)
150c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
151c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1528ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(memmove_bwd):
1538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
1548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	%eax, %edx
1558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	SRC(%esp), %eax
1568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(copy_backward)
1578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1588ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(copy_forward):
1598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
1608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$48, %ecx
1618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(48bytesormore)
1628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1638ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_less32bytes):
1648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_MEMMOVE
1658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	%dl, %al
1668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(bk_write)
1678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
1688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edx
1698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
1708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
1718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_MEMMOVE
172c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1738ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write):
1748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
1758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
1768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
177c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1788ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(48bytesormore):
179c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
180c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movlpd	(%eax), %xmm0
181c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movlpd	8(%eax), %xmm1
182c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movlpd	%xmm0, (%edx)
183c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movlpd	%xmm1, 8(%edx)
184c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	(%eax), %xmm0
186c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	PUSH (%edi)
1888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%edx, %edi
1898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	and	$-16, %edx
1908ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$16, %edx
1918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	%edx, %edi
1928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %ecx
1938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	%edi, %eax
1948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifdef SHARED_CACHE_SIZE_HALF
1968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
1978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#else
1985982e33aca9dde3ba7b2487c4866e52c667485faNick Kralevich# if (defined SHARED || defined __PIC__)
199c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
2008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$_GLOBAL_OFFSET_TABLE_, %ebx
2018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
2028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# else
2038ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	__x86_shared_cache_size_half, %ecx
2048ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
2058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
2068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	mov	%eax, %edi
2088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(large_page)
2098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	and	$0xf, %edi
2108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jz	L(shl_0)
2118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
2128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
213c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2148ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0):
215c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef USE_AS_MEMMOVE
216c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
217c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
218c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
2208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$127, %ecx
2218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	ja	L(shl_0_gobble)
2228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-32(%ecx), %ecx
223c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
224c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2258ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_loop):
2268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax, %edi), %xmm0
2278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm1
2288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
2298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx, %edi)
2308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 16(%edx, %edi)
2318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
2328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(shl_0_end)
2338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax, %edi), %xmm0
2358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm1
2368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
2378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx, %edi)
2388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 16(%edx, %edi)
2398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
2408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(shl_0_end)
2418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax, %edi), %xmm0
2438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm1
2448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
2458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx, %edi)
2468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 16(%edx, %edi)
2478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
2488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(shl_0_end)
2498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax, %edi), %xmm0
2518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm1
2528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
2538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx, %edi)
2548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 16(%edx, %edi)
2558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
256c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2578ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_end):
2588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
2598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
2608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
2618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %eax
2628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	POP (%edi)
263c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
2648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
265124a542aa4d78040176f65b28f4958540b5d89aaBruce Beare	CFI_PUSH (%edi)
2668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
267c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
268c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(shl_0_gobble):
2698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifdef DATA_CACHE_SIZE_HALF
2708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$DATA_CACHE_SIZE_HALF, %ecx
2718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#else
2725982e33aca9dde3ba7b2487c4866e52c667485faNick Kralevich# if (defined SHARED || defined __PIC__)
273c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
2748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$_GLOBAL_OFFSET_TABLE_, %ebx
2758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
2768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# else
2778ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	__x86_data_cache_size_half, %ecx
2788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
2798ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
280c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
2818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-128(%ecx), %ecx
2828ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(shl_0_gobble_mem_loop)
283c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
284c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2858ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_gobble_cache_loop):
2868ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
2878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x10(%eax), %xmm1
2888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x20(%eax), %xmm2
2898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x30(%eax), %xmm3
2908ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x40(%eax), %xmm4
2918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x50(%eax), %xmm5
2928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x60(%eax), %xmm6
2938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x70(%eax), %xmm7
2948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%eax), %eax
2958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$128, %ecx
2968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
2978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x10(%edx)
2988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, 0x20(%edx)
2998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, 0x30(%edx)
3008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm4, 0x40(%edx)
3018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm5, 0x50(%edx)
3028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm6, 0x60(%edx)
3038ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm7, 0x70(%edx)
3048ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%edx), %edx
3058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(shl_0_gobble_cache_loop)
3078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$-0x40, %ecx
3088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%ecx), %ecx
3098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jl	L(shl_0_cache_less_64bytes)
3108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
3128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x40, %ecx
3138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x10(%eax), %xmm1
3148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
3158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x10(%edx)
3168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x20(%eax), %xmm0
3178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x30(%eax), %xmm1
3188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x40, %eax
3198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, 0x20(%edx)
3208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x30(%edx)
3218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x40, %edx
322c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
3238ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_cache_less_64bytes):
3248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$0x20, %ecx
3258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(shl_0_cache_less_32bytes)
3268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
3278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x20, %ecx
3288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x10(%eax), %xmm1
3298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x20, %eax
3308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
3318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x10(%edx)
3328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x20, %edx
333c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
3348ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_cache_less_32bytes):
3358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$0x10, %ecx
3368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(shl_0_cache_less_16bytes)
3378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x10, %ecx
3388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
3398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x10, %eax
3408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
3418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x10, %edx
342c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
3438ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_cache_less_16bytes):
3448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edx
3458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
3468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
3478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
348c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
3498ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_gobble_mem_loop):
3508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	prefetcht0 0x1c0(%eax)
3518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	prefetcht0 0x280(%eax)
3528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	prefetcht0 0x1c0(%edx)
3538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
3558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x10(%eax), %xmm1
3568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x20(%eax), %xmm2
3578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x30(%eax), %xmm3
3588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x40(%eax), %xmm4
3598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x50(%eax), %xmm5
3608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x60(%eax), %xmm6
3618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x70(%eax), %xmm7
3628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%eax), %eax
3638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x80, %ecx
3648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
3658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x10(%edx)
3668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, 0x20(%edx)
3678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, 0x30(%edx)
3688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm4, 0x40(%edx)
3698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm5, 0x50(%edx)
3708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm6, 0x60(%edx)
3718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm7, 0x70(%edx)
3728ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%edx), %edx
3738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(shl_0_gobble_mem_loop)
3758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$-0x40, %ecx
3768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%ecx), %ecx
3778ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jl	L(shl_0_mem_less_64bytes)
3788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3798ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
3808ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x40, %ecx
3818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x10(%eax), %xmm1
3828ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3838ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
3848ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x10(%edx)
3858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3868ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x20(%eax), %xmm0
3878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x30(%eax), %xmm1
3888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x40, %eax
3898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3908ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, 0x20(%edx)
3918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x30(%edx)
3928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x40, %edx
393c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
3948ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_mem_less_64bytes):
3958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$0x20, %ecx
3968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(shl_0_mem_less_32bytes)
3978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
3988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x20, %ecx
3998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	0x10(%eax), %xmm1
4008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x20, %eax
4018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
4028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x10(%edx)
4038ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x20, %edx
404c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
4058ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_mem_less_32bytes):
4068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$0x10, %ecx
4078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(shl_0_mem_less_16bytes)
4088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x10, %ecx
4098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	(%eax), %xmm0
4108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x10, %eax
4118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
4128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	$0x10, %edx
413c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
4148ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_0_mem_less_16bytes):
4158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edx
4168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
417c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
4188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
419c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
4208ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_1):
421c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
422c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-1(%eax), %xmm1
423c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
424c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
425c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-1(%eax), %xmm1
426c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
427c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
428c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
429c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
430c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
431c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
432c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
433c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
434c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
435c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
436c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
437c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
438c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
439c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_1_no_prefetch)
440c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
441c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
442c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
443c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
444c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl1LoopStart):
445c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
446c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
447c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	15(%eax), %xmm2
448c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	31(%eax), %xmm3
449c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	47(%eax), %xmm4
450c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	63(%eax), %xmm5
451c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
452c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$1, %xmm4, %xmm5
453c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$1, %xmm3, %xmm4
454c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
455c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$1, %xmm2, %xmm3
456c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
457c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$1, %xmm1, %xmm2
458c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
459c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
460c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
461c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
462c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
463c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
464c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl1LoopStart)
465c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
466c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl1LoopLeave):
467c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
468c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
469c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
470c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	15(%eax), %xmm2
471c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	31(%eax), %xmm3
472c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$1, %xmm2, %xmm3
473c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$1, %xmm1, %xmm2
474c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
475c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
476c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
477c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
478c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
479c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
480c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
481c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
482c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
483c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
484c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_1_no_prefetch):
485c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
4868ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-1(%eax), %eax
4878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
4888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
489c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
490c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_1_no_prefetch_loop):
4918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
4928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
4938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
4948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
4958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$1, %xmm2, %xmm3
4968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$1, %xmm1, %xmm2
4978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
4988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
4998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
500c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_1_end_no_prefetch_loop)
5018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
5028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
5038ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
5048ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
5058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
5068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$1, %xmm2, %xmm3
5078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$1, %xmm4, %xmm2
5088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
5098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
5108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
511c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_1_no_prefetch_loop)
5128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
513c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_1_end_no_prefetch_loop):
5148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
5158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
5168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
5178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	1(%edi, %eax), %eax
518c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
519c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
520c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
521c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
5228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
523c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
5248ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_2):
525c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
526c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-2(%eax), %xmm1
527c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
528c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
529c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-2(%eax), %xmm1
530c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
531c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
532c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
533c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
534c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
535c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
536c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
537c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
538c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
539c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
540c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
541c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
542c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
543c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_2_no_prefetch)
544c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
545c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
546c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
547c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
548c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl2LoopStart):
549c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
550c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
551c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	14(%eax), %xmm2
552c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	30(%eax), %xmm3
553c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	46(%eax), %xmm4
554c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	62(%eax), %xmm5
555c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
556c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$2, %xmm4, %xmm5
557c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$2, %xmm3, %xmm4
558c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
559c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$2, %xmm2, %xmm3
560c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
561c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$2, %xmm1, %xmm2
562c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
563c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
564c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
565c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
566c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
567c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
568c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl2LoopStart)
569c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
570c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl2LoopLeave):
571c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
572c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
573c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
574c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	14(%eax), %xmm2
575c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	30(%eax), %xmm3
576c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$2, %xmm2, %xmm3
577c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$2, %xmm1, %xmm2
578c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
579c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
580c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
581c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
582c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
583c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
584c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
585c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
586c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
587c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
588c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_2_no_prefetch):
589c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
5908ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-2(%eax), %eax
5918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
5928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
593c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
594c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_2_no_prefetch_loop):
5958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
5968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
5978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
5988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
5998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$2, %xmm2, %xmm3
6008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$2, %xmm1, %xmm2
6018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
6028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
6038ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
604c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_2_end_no_prefetch_loop)
6058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
6068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
6078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
6088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
6098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
6108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$2, %xmm2, %xmm3
6118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$2, %xmm4, %xmm2
6128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
6138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
6148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
615c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_2_no_prefetch_loop)
6168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
617c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_2_end_no_prefetch_loop):
6188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
6198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
6208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
6218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	2(%edi, %eax), %eax
622c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
623c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
624c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
625c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
6268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
627c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
6288ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_3):
629c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
630c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-3(%eax), %xmm1
631c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
632c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
633c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-3(%eax), %xmm1
634c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
635c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
636c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
637c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
638c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
639c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
640c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
641c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
642c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
643c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
644c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
645c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
646c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
647c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_3_no_prefetch)
648c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
649c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
650c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
651c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
652c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl3LoopStart):
653c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
654c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
655c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	13(%eax), %xmm2
656c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	29(%eax), %xmm3
657c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	45(%eax), %xmm4
658c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	61(%eax), %xmm5
659c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
660c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$3, %xmm4, %xmm5
661c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$3, %xmm3, %xmm4
662c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
663c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$3, %xmm2, %xmm3
664c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
665c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$3, %xmm1, %xmm2
666c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
667c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
668c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
669c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
670c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
671c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
672c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl3LoopStart)
673c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
674c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl3LoopLeave):
675c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
676c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
677c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
678c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	13(%eax), %xmm2
679c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	29(%eax), %xmm3
680c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$3, %xmm2, %xmm3
681c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$3, %xmm1, %xmm2
682c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
683c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
684c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
685c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
686c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
687c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
688c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
689c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
690c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
691c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
692c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_3_no_prefetch):
693c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
6948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-3(%eax), %eax
6958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
6968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
697c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
698c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_3_no_prefetch_loop):
6998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
7008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
7018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
7028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
7038ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$3, %xmm2, %xmm3
7048ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$3, %xmm1, %xmm2
7058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
7068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
7078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
7088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
709c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_3_end_no_prefetch_loop)
7108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
7118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
7128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
7138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
7148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
7158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$3, %xmm2, %xmm3
7168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$3, %xmm4, %xmm2
7178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
7188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
7198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
7208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
721c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_3_no_prefetch_loop)
7228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
723c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_3_end_no_prefetch_loop):
7248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
7258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
7268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
7278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	3(%edi, %eax), %eax
728c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
729c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
730c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
731c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
7328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
733c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
7348ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_4):
735c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
736c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-4(%eax), %xmm1
737c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
738c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
739c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-4(%eax), %xmm1
740c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
741c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
742c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
743c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
744c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
745c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
746c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
747c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
748c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
749c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
750c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
751c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
752c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
753c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_4_no_prefetch)
754c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
755c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
756c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
757c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
758c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl4LoopStart):
759c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
760c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
761c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	12(%eax), %xmm2
762c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	28(%eax), %xmm3
763c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	44(%eax), %xmm4
764c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	60(%eax), %xmm5
765c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
766c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$4, %xmm4, %xmm5
767c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$4, %xmm3, %xmm4
768c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
769c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$4, %xmm2, %xmm3
770c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
771c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$4, %xmm1, %xmm2
772c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
773c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
774c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
775c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
776c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
777c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
778c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl4LoopStart)
779c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
780c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl4LoopLeave):
781c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
782c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
783c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
784c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	12(%eax), %xmm2
785c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	28(%eax), %xmm3
786c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$4, %xmm2, %xmm3
787c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$4, %xmm1, %xmm2
788c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
789c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
790c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
791c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
792c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
793c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
794c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
795c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
796c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
797c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
798c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_4_no_prefetch):
799c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
8008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-4(%eax), %eax
8018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
8028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
803c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
804c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_4_no_prefetch_loop):
8058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
8068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
8078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
8088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
8098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$4, %xmm2, %xmm3
8108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$4, %xmm1, %xmm2
8118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
8128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
8138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
8148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
815c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_4_end_no_prefetch_loop)
8168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
8178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
8188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
8198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
8208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
8218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$4, %xmm2, %xmm3
8228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$4, %xmm4, %xmm2
8238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
8248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
8258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
8268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
827c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_4_no_prefetch_loop)
8288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
829c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_4_end_no_prefetch_loop):
8308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
8318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
8328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
8338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	4(%edi, %eax), %eax
834c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
835c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
8368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
837c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
838c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
839c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
8408ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_5):
841c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
842c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-5(%eax), %xmm1
843c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
844c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
845c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-5(%eax), %xmm1
846c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
847c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
848c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
849c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
850c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
851c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
852c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
853c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
854c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
855c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
856c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
857c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
858c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
859c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_5_no_prefetch)
860c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
861c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
862c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
863c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
864c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl5LoopStart):
865c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
866c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
867c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	11(%eax), %xmm2
868c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	27(%eax), %xmm3
869c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	43(%eax), %xmm4
870c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	59(%eax), %xmm5
871c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
872c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$5, %xmm4, %xmm5
873c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$5, %xmm3, %xmm4
874c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
875c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$5, %xmm2, %xmm3
876c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
877c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$5, %xmm1, %xmm2
878c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
879c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
880c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
881c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
882c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
883c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
884c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl5LoopStart)
885c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
886c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl5LoopLeave):
887c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
888c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
889c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
890c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	11(%eax), %xmm2
891c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	27(%eax), %xmm3
892c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$5, %xmm2, %xmm3
893c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$5, %xmm1, %xmm2
894c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
895c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
896c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
897c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
898c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
899c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
900c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
901c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
902c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
903c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
904c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_5_no_prefetch):
905c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
9068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-5(%eax), %eax
9078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
9088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
909c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
910c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_5_no_prefetch_loop):
9118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
9128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
9138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
9148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
9158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$5, %xmm2, %xmm3
9168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$5, %xmm1, %xmm2
9178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
9188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
9198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
9208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
921c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_5_end_no_prefetch_loop)
9228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
9238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
9248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
9258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
9268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
9278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$5, %xmm2, %xmm3
9288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$5, %xmm4, %xmm2
9298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
9308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
9318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
9328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
933c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_5_no_prefetch_loop)
9348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
935c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_5_end_no_prefetch_loop):
9368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
9378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
9388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
9398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	5(%edi, %eax), %eax
940c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
941c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
942c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
943c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
9448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
945c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
9468ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_6):
947c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
948c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-6(%eax), %xmm1
949c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
950c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
951c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-6(%eax), %xmm1
952c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
953c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
954c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
955c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
956c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
957c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
958c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
959c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
960c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
961c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
962c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
963c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
964c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
965c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_6_no_prefetch)
966c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
967c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
968c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
969c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
970c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl6LoopStart):
971c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
972c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
973c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	10(%eax), %xmm2
974c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	26(%eax), %xmm3
975c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	42(%eax), %xmm4
976c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	58(%eax), %xmm5
977c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
978c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$6, %xmm4, %xmm5
979c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$6, %xmm3, %xmm4
980c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
981c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$6, %xmm2, %xmm3
982c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
983c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$6, %xmm1, %xmm2
984c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
985c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
986c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
987c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
988c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
989c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
990c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl6LoopStart)
991c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
992c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl6LoopLeave):
993c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
994c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
995c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
996c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	10(%eax), %xmm2
997c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	26(%eax), %xmm3
998c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$6, %xmm2, %xmm3
999c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$6, %xmm1, %xmm2
1000c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1001c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1002c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1003c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1004c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1005c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1006c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1007c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1008c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1009c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1010c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_6_no_prefetch):
1011c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
10128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-6(%eax), %eax
10138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
10148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1015c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1016c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_6_no_prefetch_loop):
10178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
10188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
10198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
10208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
10218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$6, %xmm2, %xmm3
10228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$6, %xmm1, %xmm2
10238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
10248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
10258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
10268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1027c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_6_end_no_prefetch_loop)
10288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
10298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
10308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
10318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
10328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
10338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$6, %xmm2, %xmm3
10348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$6, %xmm4, %xmm2
10358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
10368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
10378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
10388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1039c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_6_no_prefetch_loop)
10408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1041c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_6_end_no_prefetch_loop):
10428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
10438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
10448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
10458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	6(%edi, %eax), %eax
1046c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1047c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1048c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1049c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
10508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1051c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
10528ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_7):
1053c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1054c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-7(%eax), %xmm1
1055c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1056c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1057c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-7(%eax), %xmm1
1058c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1059c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1060c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1061c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1062c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1063c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1064c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1065c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1066c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1067c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1068c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1069c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1070c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1071c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_7_no_prefetch)
10728ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1073c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1074c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1075c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1076c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl7LoopStart):
1077c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1078c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1079c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	9(%eax), %xmm2
1080c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	25(%eax), %xmm3
1081c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	41(%eax), %xmm4
1082c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	57(%eax), %xmm5
1083c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1084c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$7, %xmm4, %xmm5
1085c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$7, %xmm3, %xmm4
1086c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1087c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$7, %xmm2, %xmm3
1088c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1089c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$7, %xmm1, %xmm2
1090c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1091c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1092c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1093c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1094c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1095c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1096c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl7LoopStart)
1097c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1098c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl7LoopLeave):
1099c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1100c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1101c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1102c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	9(%eax), %xmm2
1103c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	25(%eax), %xmm3
1104c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$7, %xmm2, %xmm3
1105c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$7, %xmm1, %xmm2
1106c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1107c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1108c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1109c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1110c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1111c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1112c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1113c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1114c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1115c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1116c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_7_no_prefetch):
1117c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
1118c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-7(%eax), %eax
1119c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	xor	%edi, %edi
1120c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1121c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1122c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_7_no_prefetch_loop):
11238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
11248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
11258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
11268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
11278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$7, %xmm2, %xmm3
11288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$7, %xmm1, %xmm2
11298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
11308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
11318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1132c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_7_end_no_prefetch_loop)
11338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
11348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
11358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
11368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
11378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
11388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$7, %xmm2, %xmm3
11398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$7, %xmm4, %xmm2
11408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
11418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
11428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1143c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_7_no_prefetch_loop)
11448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1145c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_7_end_no_prefetch_loop):
11468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
11478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
11488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
11498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	7(%edi, %eax), %eax
1150c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1151c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1152c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1153c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
11548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1155c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
11568ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_8):
1157c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1158c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-8(%eax), %xmm1
1159c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1160c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1161c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-8(%eax), %xmm1
1162c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1163c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1164c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1165c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1166c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1167c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1168c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1169c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1170c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1171c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1172c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1173c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1174c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1175c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_8_no_prefetch)
1176c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1177c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1178c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1179c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1180c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl8LoopStart):
1181c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1182c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1183c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	8(%eax), %xmm2
1184c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	24(%eax), %xmm3
1185c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	40(%eax), %xmm4
1186c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	56(%eax), %xmm5
1187c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1188c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$8, %xmm4, %xmm5
1189c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$8, %xmm3, %xmm4
1190c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1191c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$8, %xmm2, %xmm3
1192c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1193c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$8, %xmm1, %xmm2
1194c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1195c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1196c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1197c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1198c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1199c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1200c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl8LoopStart)
1201c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1202c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(LoopLeave8):
1203c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1204c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1205c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1206c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	8(%eax), %xmm2
1207c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	24(%eax), %xmm3
1208c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$8, %xmm2, %xmm3
1209c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$8, %xmm1, %xmm2
1210c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1211c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1212c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1213c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1214c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1215c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1216c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1217c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1218c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1219c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1220c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_8_no_prefetch):
1221c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
12228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-8(%eax), %eax
12238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
12248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1225c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1226c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_8_no_prefetch_loop):
12278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
12288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
12298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
12308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
12318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$8, %xmm2, %xmm3
12328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$8, %xmm1, %xmm2
12338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
12348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
12358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1236c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_8_end_no_prefetch_loop)
12378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
12388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
12398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
12408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
12418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
12428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$8, %xmm2, %xmm3
12438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$8, %xmm4, %xmm2
12448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
12458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
12468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1247c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_8_no_prefetch_loop)
12488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1249c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_8_end_no_prefetch_loop):
12508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
12518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
12528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
12538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	8(%edi, %eax), %eax
1254c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1255c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1256c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1257c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
12588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1259c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
12608ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_9):
1261c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1262c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-9(%eax), %xmm1
1263c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1264c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1265c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-9(%eax), %xmm1
1266c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1267c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1268c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1269c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1270c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1271c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1272c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1273c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1274c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1275c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1276c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1277c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1278c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1279c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_9_no_prefetch)
1280c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1281c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1282c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1283c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1284c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl9LoopStart):
1285c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1286c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1287c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	7(%eax), %xmm2
1288c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	23(%eax), %xmm3
1289c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	39(%eax), %xmm4
1290c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	55(%eax), %xmm5
1291c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1292c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$9, %xmm4, %xmm5
1293c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$9, %xmm3, %xmm4
1294c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1295c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$9, %xmm2, %xmm3
1296c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1297c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$9, %xmm1, %xmm2
1298c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1299c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1300c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1301c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1302c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1303c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1304c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl9LoopStart)
1305c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1306c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl9LoopLeave):
1307c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1308c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1309c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1310c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	7(%eax), %xmm2
1311c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	23(%eax), %xmm3
1312c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$9, %xmm2, %xmm3
1313c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$9, %xmm1, %xmm2
1314c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1315c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1316c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1317c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1318c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1319c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1320c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1321c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1322c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1323c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1324c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1325c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_9_no_prefetch):
1326c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
13278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-9(%eax), %eax
13288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
13298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1330c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1331c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_9_no_prefetch_loop):
13328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
13338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
13348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
13358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
13368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$9, %xmm2, %xmm3
13378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$9, %xmm1, %xmm2
13388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
13398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
13408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1341c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_9_end_no_prefetch_loop)
13428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
13438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
13448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
13458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
13468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
13478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$9, %xmm2, %xmm3
13488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$9, %xmm4, %xmm2
13498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
13508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
13518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1352c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_9_no_prefetch_loop)
13538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1354c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_9_end_no_prefetch_loop):
13558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
13568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
13578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
13588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	9(%edi, %eax), %eax
1359c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1360c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
13618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1362c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1363c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1364c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
13658ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_10):
1366c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1367c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-10(%eax), %xmm1
1368c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1369c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1370c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-10(%eax), %xmm1
1371c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1372c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1373c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1374c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1375c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1376c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1377c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1378c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1379c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1380c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1381c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1382c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1383c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1384c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_10_no_prefetch)
1385c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1386c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1387c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1388c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1389c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl10LoopStart):
1390c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1391c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1392c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	6(%eax), %xmm2
1393c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	22(%eax), %xmm3
1394c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	38(%eax), %xmm4
1395c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	54(%eax), %xmm5
1396c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1397c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$10, %xmm4, %xmm5
1398c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$10, %xmm3, %xmm4
1399c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1400c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$10, %xmm2, %xmm3
1401c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1402c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$10, %xmm1, %xmm2
1403c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1404c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1405c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1406c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1407c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1408c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1409c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl10LoopStart)
1410c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1411c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl10LoopLeave):
1412c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1413c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1414c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1415c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	6(%eax), %xmm2
1416c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	22(%eax), %xmm3
1417c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$10, %xmm2, %xmm3
1418c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$10, %xmm1, %xmm2
1419c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1420c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1421c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1422c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1423c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1424c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1425c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1426c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1427c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1428c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1429c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1430c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_10_no_prefetch):
1431c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
14328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-10(%eax), %eax
14338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
14348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1435c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1436c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_10_no_prefetch_loop):
14378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
14388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
14398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
14408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
14418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$10, %xmm2, %xmm3
14428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$10, %xmm1, %xmm2
14438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
14448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
14458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1446c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_10_end_no_prefetch_loop)
14478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
14488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
14498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
14508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
14518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
14528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$10, %xmm2, %xmm3
14538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$10, %xmm4, %xmm2
14548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
14558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
14568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1457c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_10_no_prefetch_loop)
14588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1459c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_10_end_no_prefetch_loop):
14608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
14618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
14628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
14638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	10(%edi, %eax), %eax
1464c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1465c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1466c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1467c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
14688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1469c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
14708ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_11):
1471c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1472c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-11(%eax), %xmm1
1473c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1474c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1475c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-11(%eax), %xmm1
1476c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1477c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1478c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1479c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1480c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1481c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1482c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1483c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1484c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1485c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1486c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1487c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1488c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1489c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_11_no_prefetch)
1490c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1491c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1492c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1493c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1494c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl11LoopStart):
1495c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1496c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1497c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	5(%eax), %xmm2
1498c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	21(%eax), %xmm3
1499c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	37(%eax), %xmm4
1500c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	53(%eax), %xmm5
1501c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1502c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$11, %xmm4, %xmm5
1503c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$11, %xmm3, %xmm4
1504c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1505c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$11, %xmm2, %xmm3
1506c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1507c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$11, %xmm1, %xmm2
1508c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1509c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1510c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1511c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1512c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1513c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1514c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl11LoopStart)
1515c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1516c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl11LoopLeave):
1517c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1518c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1519c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1520c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	5(%eax), %xmm2
1521c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	21(%eax), %xmm3
1522c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$11, %xmm2, %xmm3
1523c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$11, %xmm1, %xmm2
1524c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1525c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1526c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1527c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1528c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1529c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1530c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1531c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1532c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1533c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1534c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1535c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_11_no_prefetch):
1536c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
15378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-11(%eax), %eax
15388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
15398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1540c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1541c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_11_no_prefetch_loop):
15428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
15438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
15448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
15458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
15468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$11, %xmm2, %xmm3
15478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$11, %xmm1, %xmm2
15488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
15498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
15508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1551c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_11_end_no_prefetch_loop)
15528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
15538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
15548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
15558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
15568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
15578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$11, %xmm2, %xmm3
15588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$11, %xmm4, %xmm2
15598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
15608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
15618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1562c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_11_no_prefetch_loop)
15638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1564c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_11_end_no_prefetch_loop):
15658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
15668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
15678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
15688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	11(%edi, %eax), %eax
1569c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1570c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
15718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1572c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1573c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1574c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
15758ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_12):
1576c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1577c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-12(%eax), %xmm1
1578c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1579c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1580c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-12(%eax), %xmm1
1581c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1582c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1583c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1584c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1585c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1586c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1587c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1588c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1589c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1590c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1591c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1592c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1593c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1594c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_12_no_prefetch)
1595c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1596c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1597c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1598c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1599c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl12LoopStart):
1600c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1601c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1602c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	4(%eax), %xmm2
1603c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	20(%eax), %xmm3
1604c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	36(%eax), %xmm4
1605c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	52(%eax), %xmm5
1606c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1607c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$12, %xmm4, %xmm5
1608c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$12, %xmm3, %xmm4
1609c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1610c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$12, %xmm2, %xmm3
1611c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1612c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$12, %xmm1, %xmm2
1613c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1614c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1615c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1616c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1617c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1618c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1619c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl12LoopStart)
1620c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1621c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl12LoopLeave):
1622c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1623c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1624c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1625c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	4(%eax), %xmm2
1626c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	20(%eax), %xmm3
1627c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$12, %xmm2, %xmm3
1628c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$12, %xmm1, %xmm2
1629c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1630c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1631c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1632c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1633c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1634c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1635c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1636c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1637c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1638c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1639c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1640c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_12_no_prefetch):
1641c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
16428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-12(%eax), %eax
16438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
16448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1645c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1646c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_12_no_prefetch_loop):
16478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
16488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
16498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
16508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
16518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$12, %xmm2, %xmm3
16528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$12, %xmm1, %xmm2
16538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
16548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
16558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1656c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_12_end_no_prefetch_loop)
16578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
16588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
16598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
16608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
16618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
16628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$12, %xmm2, %xmm3
16638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$12, %xmm4, %xmm2
16648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
16658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
16668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1667c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_12_no_prefetch_loop)
16688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1669c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_12_end_no_prefetch_loop):
16708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
16718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
16728ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
16738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	12(%edi, %eax), %eax
1674c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1675c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1676c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1677c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
16788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1679c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
16808ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_13):
1681c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1682c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-13(%eax), %xmm1
1683c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1684c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1685c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-13(%eax), %xmm1
1686c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1687c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1688c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1689c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1690c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1691c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1692c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1693c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1694c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1695c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1696c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1697c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1698c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1699c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_13_no_prefetch)
1700c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1701c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1702c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1703c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1704c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl13LoopStart):
1705c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1706c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1707c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	3(%eax), %xmm2
1708c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	19(%eax), %xmm3
1709c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	35(%eax), %xmm4
1710c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	51(%eax), %xmm5
1711c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1712c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$13, %xmm4, %xmm5
1713c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$13, %xmm3, %xmm4
1714c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1715c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$13, %xmm2, %xmm3
1716c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1717c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$13, %xmm1, %xmm2
1718c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1719c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1720c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1721c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1722c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1723c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1724c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl13LoopStart)
1725c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1726c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl13LoopLeave):
1727c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1728c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1729c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1730c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	3(%eax), %xmm2
1731c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	19(%eax), %xmm3
1732c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$13, %xmm2, %xmm3
1733c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$13, %xmm1, %xmm2
1734c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1735c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1736c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1737c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1738c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1739c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1740c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1741c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1742c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1743c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1744c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1745c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_13_no_prefetch):
1746c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
17478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-13(%eax), %eax
17488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
17498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1750c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1751c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_13_no_prefetch_loop):
17528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
17538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
17548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
17558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
17568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$13, %xmm2, %xmm3
17578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$13, %xmm1, %xmm2
17588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
17598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
17608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1761c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_13_end_no_prefetch_loop)
17628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
17638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
17648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
17658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
17668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
17678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$13, %xmm2, %xmm3
17688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$13, %xmm4, %xmm2
17698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
17708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
17718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1772c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_13_no_prefetch_loop)
17738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1774c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_13_end_no_prefetch_loop):
17758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
17768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
17778ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
17788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	13(%edi, %eax), %eax
1779c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1780c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
17818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1782c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1783c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1784c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
17858ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_14):
1786c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1787c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-14(%eax), %xmm1
1788c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1789c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1790c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-14(%eax), %xmm1
1791c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1792c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1793c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1794c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1795c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1796c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1797c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1798c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1799c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1800c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1801c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1802c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1803c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1804c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_14_no_prefetch)
1805c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1806c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1807c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1808c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1809c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl14LoopStart):
1810c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1811c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1812c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	2(%eax), %xmm2
1813c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	18(%eax), %xmm3
1814c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	34(%eax), %xmm4
1815c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	50(%eax), %xmm5
1816c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1817c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$14, %xmm4, %xmm5
1818c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$14, %xmm3, %xmm4
1819c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1820c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$14, %xmm2, %xmm3
1821c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1822c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$14, %xmm1, %xmm2
1823c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1824c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1825c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1826c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1827c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1828c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1829c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl14LoopStart)
1830c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1831c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl14LoopLeave):
1832c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1833c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1834c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1835c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	2(%eax), %xmm2
1836c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	18(%eax), %xmm3
1837c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$14, %xmm2, %xmm3
1838c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$14, %xmm1, %xmm2
1839c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1840c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1841c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1842c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1843c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1844c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1845c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1846c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1847c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1848c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1849c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1850c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_14_no_prefetch):
1851c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
18528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-14(%eax), %eax
18538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
18548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1855c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1856c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_14_no_prefetch_loop):
18578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
18588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
18598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
18608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
18618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$14, %xmm2, %xmm3
18628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$14, %xmm1, %xmm2
18638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
18648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
18658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1866c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_14_end_no_prefetch_loop)
18678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
18688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
18698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
18708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
18718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
18728ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$14, %xmm2, %xmm3
18738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$14, %xmm4, %xmm2
18748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
18758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
18768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1877c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_14_no_prefetch_loop)
18788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1879c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_14_end_no_prefetch_loop):
18808ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
18818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
18828ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
18838ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	14(%edi, %eax), %eax
1884c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1885c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1886c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1887c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
18888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1889c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
18908ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_15):
1891c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_MEMMOVE
1892c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-15(%eax), %xmm1
1893c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1894c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
1895c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	-15(%eax), %xmm1
1896c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
1897c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1898c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef DATA_CACHE_SIZE_HALF
1899c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	$DATA_CACHE_SIZE_HALF, %ecx
1900c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#else
1901c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# if (defined SHARED || defined __PIC__)
1902c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	SETUP_PIC_REG(bx)
1903c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$_GLOBAL_OFFSET_TABLE_, %ebx
1904c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
1905c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
1906c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	cmp	__x86_data_cache_size_half, %ecx
1907c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
1908c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
1909c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb L(sh_15_no_prefetch)
1910c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1911c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-64(%ecx), %ecx
1912c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1913c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1914c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl15LoopStart):
1915c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%eax)
1916c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	prefetcht0 0x1c0(%edx)
1917c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	1(%eax), %xmm2
1918c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	17(%eax), %xmm3
1919c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	33(%eax), %xmm4
1920c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	49(%eax), %xmm5
1921c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, %xmm7
1922c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$15, %xmm4, %xmm5
1923c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$15, %xmm3, %xmm4
1924c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm5, 48(%edx)
1925c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$15, %xmm2, %xmm3
1926c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%eax), %eax
1927c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$15, %xmm1, %xmm2
1928c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm4, 32(%edx)
1929c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1930c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm7, %xmm1
1931c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1932c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	64(%edx), %edx
1933c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %ecx
1934c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	ja	L(Shl15LoopStart)
1935c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1936c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(Shl15LoopLeave):
1937c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	$32, %ecx
1938c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jle	L(shl_end_0)
1939c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1940c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	1(%eax), %xmm2
1941c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	17(%eax), %xmm3
1942c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$15, %xmm2, %xmm3
1943c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	palignr	$15, %xmm1, %xmm2
1944c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1945c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm2, (%edx)
1946c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movaps	%xmm3, 16(%edx)
1947c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%edx, %ecx), %edx
1948c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%eax, %ecx), %eax
1949c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
1950c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
1951c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1952c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1953c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1954c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1955c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_15_no_prefetch):
1956c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	-32(%ecx), %ecx
19578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-15(%eax), %eax
19588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	xor	%edi, %edi
19598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1960c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1961c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_15_no_prefetch_loop):
19628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
19638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
19648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
19658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm4
19668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$15, %xmm2, %xmm3
19678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$15, %xmm1, %xmm2
19688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
19698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
19708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1971c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jb	L(sh_15_end_no_prefetch_loop)
19728ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
19738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	16(%eax, %edi), %xmm2
19748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
19758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	32(%eax, %edi), %xmm3
19768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, %xmm1
19778ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$15, %xmm2, %xmm3
19788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	palignr	$15, %xmm4, %xmm2
19798ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%edi), %edi
19808ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, -32(%edx, %edi)
19818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, -16(%edx, %edi)
1982c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	jae	L(sh_15_no_prefetch_loop)
19838ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1984c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(sh_15_end_no_prefetch_loop):
19858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	32(%ecx), %ecx
19868ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edi
19878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%edi, %edx
19888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	15(%edi, %eax), %eax
1989c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
1990c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
19918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
1992c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
1993c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
1994c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
1995c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(shl_end_0):
1996c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	32(%ecx), %ecx
1997c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	(%edx, %ecx), %edx
1998c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	(%eax, %ecx), %eax
1999c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP	(%edi)
2000c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
20018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2002c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
20038ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_44bytes):
2004c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-44(%eax), %xmm0
2005c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -44(%edx)
20068ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_36bytes):
2007c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-36(%eax), %xmm0
2008c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -36(%edx)
20098ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_28bytes):
2010c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-28(%eax), %xmm0
2011c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -28(%edx)
20128ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_20bytes):
2013c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-20(%eax), %xmm0
2014c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -20(%edx)
20158ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_12bytes):
2016c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-12(%eax), %xmm0
2017c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -12(%edx)
20188ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_4bytes):
20198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	-4(%eax), %ecx
20208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, -4(%edx)
2021c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2022c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2023c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2024c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2025c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2026c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2027c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2028c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2029c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2030c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2031c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_40bytes):
2032c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-40(%eax), %xmm0
2033c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -40(%edx)
2034c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_32bytes):
2035c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-32(%eax), %xmm0
2036c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -32(%edx)
2037c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_24bytes):
2038c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-24(%eax), %xmm0
2039c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -24(%edx)
2040c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_16bytes):
2041c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-16(%eax), %xmm0
2042c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -16(%edx)
2043c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_8bytes):
2044c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-8(%eax), %xmm0
2045c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -8(%edx)
20468ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(fwd_write_0bytes):
20478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
20488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
20498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%edx, %eax
20508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# else
20518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
20528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
20538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
20548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN
20558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2056c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2057c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_5bytes):
2058c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-5(%eax), %ecx
2059c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-4(%eax), %eax
2060c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -5(%edx)
2061c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%eax, -4(%edx)
2062c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2063c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2064c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2065c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2066c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2067c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2068c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2069c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2070c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2071c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2072c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_45bytes):
2073c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-45(%eax), %xmm0
2074c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -45(%edx)
2075c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_37bytes):
2076c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-37(%eax), %xmm0
2077c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -37(%edx)
2078c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_29bytes):
2079c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-29(%eax), %xmm0
2080c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -29(%edx)
2081c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_21bytes):
2082c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-21(%eax), %xmm0
2083c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -21(%edx)
2084c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_13bytes):
2085c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-13(%eax), %xmm0
2086c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -13(%edx)
2087c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-5(%eax), %ecx
2088c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -5(%edx)
2089c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %ecx
2090c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%cl, -1(%edx)
2091c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2092c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2093c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2094c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2095c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2096c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2097c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2098c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2099c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2100c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2101c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_41bytes):
2102c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-41(%eax), %xmm0
2103c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -41(%edx)
2104c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_33bytes):
2105c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-33(%eax), %xmm0
2106c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -33(%edx)
2107c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_25bytes):
2108c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-25(%eax), %xmm0
2109c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -25(%edx)
2110c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_17bytes):
2111c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-17(%eax), %xmm0
2112c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -17(%edx)
2113c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_9bytes):
2114c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-9(%eax), %xmm0
2115c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -9(%edx)
2116c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_1bytes):
2117c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %ecx
2118c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%cl, -1(%edx)
2119c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2120c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2121c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2122c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2123c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2124c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2125c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2126c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2127c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2128c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2129c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_46bytes):
2130c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-46(%eax), %xmm0
2131c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -46(%edx)
2132c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_38bytes):
2133c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-38(%eax), %xmm0
2134c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -38(%edx)
2135c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_30bytes):
2136c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-30(%eax), %xmm0
2137c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -30(%edx)
2138c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_22bytes):
2139c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-22(%eax), %xmm0
2140c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -22(%edx)
2141c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_14bytes):
2142c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-14(%eax), %xmm0
2143c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -14(%edx)
2144c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_6bytes):
2145c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-6(%eax), %ecx
2146c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -6(%edx)
2147c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-2(%eax), %ecx
2148c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -2(%edx)
2149c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2150c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2151c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2152c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2153c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2154c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2155c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2156c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2157c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2158c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2159c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_42bytes):
2160c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-42(%eax), %xmm0
2161c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -42(%edx)
2162c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_34bytes):
2163c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-34(%eax), %xmm0
2164c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -34(%edx)
2165c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_26bytes):
2166c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-26(%eax), %xmm0
2167c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -26(%edx)
2168c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_18bytes):
2169c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-18(%eax), %xmm0
2170c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -18(%edx)
2171c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_10bytes):
2172c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-10(%eax), %xmm0
2173c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -10(%edx)
2174c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_2bytes):
2175c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-2(%eax), %ecx
2176c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -2(%edx)
2177c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2178c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2179c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2180c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2181c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2182c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2183c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2184c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2185c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2186c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2187c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_47bytes):
2188c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-47(%eax), %xmm0
2189c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -47(%edx)
2190c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_39bytes):
2191c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-39(%eax), %xmm0
2192c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -39(%edx)
2193c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_31bytes):
2194c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-31(%eax), %xmm0
2195c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -31(%edx)
2196c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_23bytes):
2197c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-23(%eax), %xmm0
2198c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -23(%edx)
2199c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_15bytes):
2200c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-15(%eax), %xmm0
2201c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -15(%edx)
2202c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_7bytes):
2203c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-7(%eax), %ecx
2204c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -7(%edx)
2205c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-3(%eax), %ecx
2206c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %eax
2207c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -3(%edx)
2208c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%al, -1(%edx)
2209c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2210c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2211c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2212c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2213c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2214c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2215c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2216c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2217c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2218c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2219c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_43bytes):
2220c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-43(%eax), %xmm0
2221c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -43(%edx)
2222c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_35bytes):
2223c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-35(%eax), %xmm0
2224c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -35(%edx)
2225c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_27bytes):
2226c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-27(%eax), %xmm0
2227c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -27(%edx)
2228c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_19bytes):
2229c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-19(%eax), %xmm0
2230c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -19(%edx)
2231c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_11bytes):
2232c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-11(%eax), %xmm0
2233c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -11(%edx)
2234c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_3bytes):
2235c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-3(%eax), %ecx
2236c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %eax
2237c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -3(%edx)
2238c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%al, -1(%edx)
2239c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2240c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2241c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2242c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2243c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2244c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2245c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2246c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2247c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2248c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2249c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_40bytes_align):
2250c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-40(%eax), %xmm0
2251c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -40(%edx)
2252c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_24bytes_align):
2253c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-24(%eax), %xmm0
2254c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -24(%edx)
2255c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_8bytes_align):
2256c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-8(%eax), %xmm0
2257c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -8(%edx)
2258c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_0bytes_align):
2259c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2260c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2261c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2262c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2263c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2264c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2265c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2266c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2267c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2268c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2269c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_32bytes_align):
2270c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-32(%eax), %xmm0
2271c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -32(%edx)
2272c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_16bytes_align):
2273c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-16(%eax), %xmm0
2274c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -16(%edx)
2275c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2276c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2277c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2278c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2279c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2280c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2281c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2282c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2283c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2284c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2285c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_5bytes_align):
2286c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-5(%eax), %ecx
2287c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-4(%eax), %eax
2288c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -5(%edx)
2289c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%eax, -4(%edx)
2290c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2291c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2292c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2293c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2294c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2295c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2296c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2297c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2298c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2299c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2300c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_45bytes_align):
2301c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-45(%eax), %xmm0
2302c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -45(%edx)
2303c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_29bytes_align):
2304c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-29(%eax), %xmm0
2305c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -29(%edx)
2306c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_13bytes_align):
2307c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-13(%eax), %xmm0
2308c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -13(%edx)
23098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	-5(%eax), %ecx
23108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, -5(%edx)
2311c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %ecx
2312c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%cl, -1(%edx)
23138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
23148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
23158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%edx, %eax
23168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# else
23178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
23188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
23198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
23208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN
23218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2322c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2323c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_37bytes_align):
2324c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-37(%eax), %xmm0
2325c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -37(%edx)
2326c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_21bytes_align):
2327c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-21(%eax), %xmm0
2328c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -21(%edx)
23298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	-5(%eax), %ecx
23308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, -5(%edx)
23318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzbl	-1(%eax), %ecx
23328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movb	%cl, -1(%edx)
23338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
23348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
23358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%edx, %eax
23368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# else
23378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
23388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
23398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
23408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN
23418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2342c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2343c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_41bytes_align):
2344c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-41(%eax), %xmm0
2345c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -41(%edx)
2346c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_25bytes_align):
2347c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-25(%eax), %xmm0
2348c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -25(%edx)
2349c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_9bytes_align):
2350c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-9(%eax), %xmm0
2351c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -9(%edx)
2352c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_1bytes_align):
2353c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %ecx
2354c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%cl, -1(%edx)
2355c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2356c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2357c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2358c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2359c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2360c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2361c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2362c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2363c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2364c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2365c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_33bytes_align):
2366c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-33(%eax), %xmm0
2367c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -33(%edx)
2368c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_17bytes_align):
2369c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-17(%eax), %xmm0
2370c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -17(%edx)
2371c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %ecx
2372c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%cl, -1(%edx)
2373c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2374c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2375c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2376c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2377c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2378c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2379c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2380c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2381c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2382c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2383c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_46bytes_align):
2384c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-46(%eax), %xmm0
2385c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -46(%edx)
2386c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_30bytes_align):
2387c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-30(%eax), %xmm0
2388c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -30(%edx)
2389c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_14bytes_align):
2390c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-14(%eax), %xmm0
2391c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -14(%edx)
2392c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_6bytes_align):
23938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	-6(%eax), %ecx
23948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, -6(%edx)
23958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzwl	-2(%eax), %ecx
23968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movw	%cx, -2(%edx)
23978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
23988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
23998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%edx, %eax
24008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# else
24018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
24028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
24038ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
24048ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN
24058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2406c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2407c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_38bytes_align):
2408c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-38(%eax), %xmm0
2409c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -38(%edx)
2410c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_22bytes_align):
2411c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-22(%eax), %xmm0
2412c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -22(%edx)
2413c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-6(%eax), %ecx
2414c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -6(%edx)
2415c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-2(%eax), %ecx
2416c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -2(%edx)
2417c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2418c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2419c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2420c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2421c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2422c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2423c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2424c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2425c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2426c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2427c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_42bytes_align):
2428c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-42(%eax), %xmm0
2429c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -42(%edx)
2430c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_26bytes_align):
2431c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-26(%eax), %xmm0
2432c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -26(%edx)
2433c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_10bytes_align):
2434c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-10(%eax), %xmm0
2435c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -10(%edx)
2436c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_2bytes_align):
2437c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-2(%eax), %ecx
2438c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -2(%edx)
2439c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2440c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2441c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2442c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2443c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2444c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2445c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2446c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2447c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2448c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2449c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_34bytes_align):
2450c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-34(%eax), %xmm0
2451c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -34(%edx)
2452c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_18bytes_align):
2453c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-18(%eax), %xmm0
2454c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -18(%edx)
2455c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-2(%eax), %ecx
2456c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -2(%edx)
2457c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2458c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2459c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2460c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2461c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2462c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2463c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2464c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2465c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2466c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2467c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_47bytes_align):
2468c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-47(%eax), %xmm0
2469c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -47(%edx)
2470c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_31bytes_align):
2471c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-31(%eax), %xmm0
2472c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -31(%edx)
2473c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_15bytes_align):
2474c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-15(%eax), %xmm0
2475c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -15(%edx)
2476c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_7bytes_align):
24778ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	-7(%eax), %ecx
24788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, -7(%edx)
24798ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzwl	-3(%eax), %ecx
24808ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzbl	-1(%eax), %eax
24818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movw	%cx, -3(%edx)
24828ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movb	%al, -1(%edx)
24838ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
24848ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
24858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%edx, %eax
24868ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# else
24878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
24888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
24898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
2490c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2491c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2492c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2493c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_39bytes_align):
2494c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-39(%eax), %xmm0
2495c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -39(%edx)
2496c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_23bytes_align):
2497c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-23(%eax), %xmm0
2498c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -23(%edx)
2499c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-7(%eax), %ecx
2500c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -7(%edx)
2501c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-3(%eax), %ecx
2502c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %eax
2503c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -3(%edx)
2504c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%al, -1(%edx)
2505c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2506c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2507c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2508c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2509c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2510c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2511c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2512c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2513c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2514c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2515c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_43bytes_align):
2516c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-43(%eax), %xmm0
2517c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -43(%edx)
2518c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_27bytes_align):
2519c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-27(%eax), %xmm0
2520c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -27(%edx)
2521c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_11bytes_align):
2522c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-11(%eax), %xmm0
2523c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -11(%edx)
2524c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_3bytes_align):
2525c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-3(%eax), %ecx
2526c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %eax
2527c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -3(%edx)
2528c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%al, -1(%edx)
2529c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2530c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2531c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2532c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2533c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2534c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2535c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2536c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2537c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2538c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2539c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_35bytes_align):
2540c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-35(%eax), %xmm0
2541c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -35(%edx)
2542c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_19bytes_align):
2543c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-19(%eax), %xmm0
2544c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -19(%edx)
2545c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	-3(%eax), %ecx
2546c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	-1(%eax), %eax
2547c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, -3(%edx)
2548c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%al, -1(%edx)
2549c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2550c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2551c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2552c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2553c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2554c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2555c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2556c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2557c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2558c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2559c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_44bytes_align):
2560c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-44(%eax), %xmm0
2561c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -44(%edx)
2562c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_28bytes_align):
2563c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-28(%eax), %xmm0
2564c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -28(%edx)
2565c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_12bytes_align):
2566c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-12(%eax), %xmm0
2567c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -12(%edx)
2568c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_4bytes_align):
2569c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-4(%eax), %ecx
2570c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -4(%edx)
2571c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2572c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2573c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2574c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2575c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2576c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2577c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2578c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2579c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2580c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2581c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_36bytes_align):
2582c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-36(%eax), %xmm0
2583c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -36(%edx)
2584c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(fwd_write_20bytes_align):
2585c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	-20(%eax), %xmm0
2586c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqa	%xmm0, -20(%edx)
2587c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	-4(%eax), %ecx
2588c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%ecx, -4(%edx)
2589c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2590c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2591c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edx, %eax
2592c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# else
2593c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2594c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2595c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2596124a542aa4d78040176f65b28f4958540b5d89aaBruce Beare	RETURN_END
25978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2598c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
2599c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2600c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
26018ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(large_page):
26028ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	(%eax), %xmm1
2603c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifdef USE_AS_MEMMOVE
2604c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST+4(%esp), %edi
2605c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	%xmm0, (%edi)
2606c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
26078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	16(%eax), %eax
26088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm1, (%edx)
26098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	16(%edx), %edx
26108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	-0x90(%ecx), %ecx
26118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	POP (%edi)
2612c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2613c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
26148ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(large_page_loop):
26158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	(%eax), %xmm0
26168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x10(%eax), %xmm1
26178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x20(%eax), %xmm2
26188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x30(%eax), %xmm3
26198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x40(%eax), %xmm4
26208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x50(%eax), %xmm5
26218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x60(%eax), %xmm6
26228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x70(%eax), %xmm7
26238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%eax), %eax
26248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
26258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x80, %ecx
26268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm0, (%edx)
26278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm1, 0x10(%edx)
26288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm2, 0x20(%edx)
26298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm3, 0x30(%edx)
26308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm4, 0x40(%edx)
26318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm5, 0x50(%edx)
26328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm6, 0x60(%edx)
26338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm7, 0x70(%edx)
26348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%edx), %edx
26358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(large_page_loop)
26368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$-0x40, %ecx
26378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x80(%ecx), %ecx
26388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jl	L(large_page_less_64bytes)
26398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
26408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	(%eax), %xmm0
26418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x10(%eax), %xmm1
26428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x20(%eax), %xmm2
26438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x30(%eax), %xmm3
26448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x40(%eax), %eax
26458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
26468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm0, (%edx)
26478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm1, 0x10(%edx)
26488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm2, 0x20(%edx)
26498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm3, 0x30(%edx)
26508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x40(%edx), %edx
26518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x40, %ecx
26528ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(large_page_less_64bytes):
26538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$32, %ecx
26548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(large_page_less_32bytes)
26558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	(%eax), %xmm0
26568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqu	0x10(%eax), %xmm1
26578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x20(%eax), %eax
26588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm0, (%edx)
26598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movntdq	%xmm1, 0x10(%edx)
26608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	0x20(%edx), %edx
26618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$0x20, %ecx
26628ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(large_page_less_32bytes):
26638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %edx
26648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
26658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sfence
26668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
26678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2668c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
26698ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_44bytes):
2670c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	36(%eax), %xmm0
2671c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 36(%edx)
26728ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_36bytes):
2673c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	28(%eax), %xmm0
2674c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 28(%edx)
26758ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_28bytes):
2676c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	20(%eax), %xmm0
2677c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 20(%edx)
26788ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_20bytes):
2679c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	12(%eax), %xmm0
2680c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 12(%edx)
26818ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_12bytes):
2682c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	4(%eax), %xmm0
2683c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 4(%edx)
26848ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_4bytes):
26858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	(%eax), %ecx
26868ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, (%edx)
26878ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_0bytes):
26888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
26898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
26908ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
26918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	LEN(%esp), %ecx
26928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
26938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
26948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
26958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN
26968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2697c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2698c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_40bytes):
2699c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	32(%eax), %xmm0
2700c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 32(%edx)
2701c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_32bytes):
2702c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	24(%eax), %xmm0
2703c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 24(%edx)
2704c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_24bytes):
2705c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	16(%eax), %xmm0
2706c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 16(%edx)
2707c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_16bytes):
2708c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	8(%eax), %xmm0
2709c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 8(%edx)
2710c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_8bytes):
2711c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	(%eax), %xmm0
2712c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, (%edx)
2713c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2714c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2715c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2716c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	LEN(%esp), %ecx
2717c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	%ecx, %eax
2718c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2719c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2720c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2721c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2722c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
27238ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_45bytes):
2724c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	37(%eax), %xmm0
2725c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 37(%edx)
27268ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_37bytes):
2727c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	29(%eax), %xmm0
2728c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 29(%edx)
27298ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_29bytes):
2730c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	21(%eax), %xmm0
2731c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 21(%edx)
27328ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_21bytes):
2733c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	13(%eax), %xmm0
2734c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 13(%edx)
27358ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_13bytes):
2736c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	5(%eax), %xmm0
2737c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 5(%edx)
27388ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_5bytes):
27398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	1(%eax), %ecx
27408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, 1(%edx)
27418ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_1bytes):
27428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzbl	(%eax), %ecx
27438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movb	%cl, (%edx)
27448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
27458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
27468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
27478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	LEN(%esp), %ecx
27488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
27498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
27508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
27518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN
27528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2753c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2754c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_41bytes):
2755c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	33(%eax), %xmm0
2756c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 33(%edx)
2757c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_33bytes):
2758c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	25(%eax), %xmm0
2759c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 25(%edx)
2760c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_25bytes):
2761c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	17(%eax), %xmm0
2762c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 17(%edx)
2763c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_17bytes):
2764c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	9(%eax), %xmm0
2765c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 9(%edx)
2766c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_9bytes):
2767c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	1(%eax), %xmm0
2768c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 1(%edx)
2769c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	(%eax), %ecx
2770c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%cl, (%edx)
2771c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2772c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2773c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2774c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	LEN(%esp), %ecx
2775c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	%ecx, %eax
2776c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2777c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2778c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2779c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2780c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
27818ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_46bytes):
2782c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	38(%eax), %xmm0
2783c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 38(%edx)
27848ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_38bytes):
2785c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	30(%eax), %xmm0
2786c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 30(%edx)
27878ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_30bytes):
2788c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	22(%eax), %xmm0
2789c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 22(%edx)
27908ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_22bytes):
2791c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	14(%eax), %xmm0
2792c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 14(%edx)
27938ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_14bytes):
2794c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	6(%eax), %xmm0
2795c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 6(%edx)
27968ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_6bytes):
27978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	2(%eax), %ecx
27988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, 2(%edx)
2799c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	(%eax), %ecx
2800c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, (%edx)
2801c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2802c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2803c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2804c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	LEN(%esp), %ecx
2805c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	%ecx, %eax
2806c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2807c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2808c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2809c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2810c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2811c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_42bytes):
2812c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	34(%eax), %xmm0
2813c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 34(%edx)
2814c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_34bytes):
2815c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	26(%eax), %xmm0
2816c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 26(%edx)
2817c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_26bytes):
2818c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	18(%eax), %xmm0
2819c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 18(%edx)
2820c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_18bytes):
2821c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	10(%eax), %xmm0
2822c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 10(%edx)
2823c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_10bytes):
2824c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	2(%eax), %xmm0
2825c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 2(%edx)
28268ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_2bytes):
28278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzwl	(%eax), %ecx
28288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movw	%cx, (%edx)
28298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
28308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
28318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
28328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	LEN(%esp), %ecx
28338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
28348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
28358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
28368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN
28378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2838c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
28398ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_47bytes):
2840c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	39(%eax), %xmm0
2841c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 39(%edx)
28428ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_39bytes):
2843c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	31(%eax), %xmm0
2844c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 31(%edx)
28458ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_31bytes):
2846c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	23(%eax), %xmm0
2847c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 23(%edx)
28488ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_23bytes):
2849c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	15(%eax), %xmm0
2850c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 15(%edx)
28518ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_15bytes):
2852c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	7(%eax), %xmm0
2853c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 7(%edx)
28548ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_7bytes):
28558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	3(%eax), %ecx
28568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%ecx, 3(%edx)
2857c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	1(%eax), %ecx
2858c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movw	%cx, 1(%edx)
2859c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	(%eax), %eax
2860c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movb	%al, (%edx)
2861c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#ifndef USE_AS_BCOPY
2862c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	DEST(%esp), %eax
2863c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# ifdef USE_AS_MEMPCPY
2864c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	LEN(%esp), %ecx
2865c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	add	%ecx, %eax
2866c47703a521abab120100673d5281f71bc8ba9a49Jack Ren# endif
2867c47703a521abab120100673d5281f71bc8ba9a49Jack Ren#endif
2868c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	RETURN
2869c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
2870c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
2871c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_43bytes):
2872c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	35(%eax), %xmm0
2873c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 35(%edx)
2874c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_35bytes):
2875c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	27(%eax), %xmm0
2876c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 27(%edx)
2877c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_27bytes):
2878c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	19(%eax), %xmm0
2879c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 19(%edx)
2880c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_19bytes):
2881c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	11(%eax), %xmm0
2882c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 11(%edx)
2883c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(bk_write_11bytes):
2884c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	3(%eax), %xmm0
2885c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, 3(%edx)
28868ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_3bytes):
28878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzwl	1(%eax), %ecx
28888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movw	%cx, 1(%edx)
28898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movzbl	(%eax), %eax
28908ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movb	%al, (%edx)
28918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifndef USE_AS_BCOPY
28928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	DEST(%esp), %eax
28938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# ifdef USE_AS_MEMPCPY
28948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	LEN(%esp), %ecx
28958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	add	%ecx, %eax
28968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare# endif
28978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
28988ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	RETURN_END
28998ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
29008ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
29018ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.pushsection .rodata.ssse3,"a",@progbits
2902c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 2
29038ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(table_48bytes_fwd):
29048ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
29058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
29068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
29078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
29088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
29098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
29108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
29118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
29128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
29138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
29148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
29158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
29168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
29178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
29188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
29198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
29208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
29218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
29228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
29238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
29248ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
29258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
29268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
29278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
29288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
29298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
29308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
29318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
29328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
29338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
29348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
29358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
29368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
29378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
29388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
29398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
29408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
29418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
29428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
29438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
29448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
29458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
29468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
29478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
29488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
29498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
29508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
29518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
29528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
2953c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 2
2954c47703a521abab120100673d5281f71bc8ba9a49Jack RenL(table_48bytes_fwd_align):
2955c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
2956c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
2957c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
2958c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
2959c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
2960c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
2961c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
2962c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
2963c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
2964c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
2965c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
2966c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
2967c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
2968c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
2969c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
2970c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
2971c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
2972c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
2973c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
2974c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
2975c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
2976c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
2977c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
2978c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
2979c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
2980c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
2981c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
2982c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
2983c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
2984c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
2985c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
2986c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
2987c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
2988c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
2989c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
2990c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
2991c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
2992c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
2993c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
2994c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
2995c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
2996c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
2997c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
2998c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
2999c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
3000c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
3001c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
3002c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.int	JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
3003c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
3004c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 2
30058ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(shl_table):
30068ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_0), L(shl_table))
30078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_1), L(shl_table))
30088ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_2), L(shl_table))
30098ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_3), L(shl_table))
30108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_4), L(shl_table))
30118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_5), L(shl_table))
30128ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_6), L(shl_table))
30138ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_7), L(shl_table))
30148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_8), L(shl_table))
30158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_9), L(shl_table))
30168ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_10), L(shl_table))
30178ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_11), L(shl_table))
30188ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_12), L(shl_table))
30198ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_13), L(shl_table))
30208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_14), L(shl_table))
30218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(shl_15), L(shl_table))
30228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3023c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 2
30248ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(table_48_bytes_bwd):
30258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
30268ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
30278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
30288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
30298ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
30308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
30318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
30328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
30338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
30348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
30358ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
30368ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
30378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
30388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
30398ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
30408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
30418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
30428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
30438ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
30448ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
30458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
30468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
30478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
30488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
30498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
30508ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
30518ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
30528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
30538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
30548ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
30558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
30568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
30578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
30588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
30598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
30608ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
30618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
30628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
30638ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
30648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
30658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
30668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
30678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
30688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
30698ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
30708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
30718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
30728ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.int	JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
30738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
30748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	.popsection
30758ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
30768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#ifdef USE_AS_MEMMOVE
3077c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
30788ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(copy_backward):
3079c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	PUSH (%edi)
3080c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%eax, %edi
30818ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	lea	(%ecx,%edx,1),%edx
3082c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	lea	(%ecx,%edi,1),%edi
30838ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	testl	$0x3, %edx
30848ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jnz	L(bk_align)
30858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
30868ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_aligned_4):
30878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$64, %ecx
30888ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(bk_write_more64bytes)
30898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
30908ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_64bytesless):
30918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$32, %ecx
30928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(bk_write_less32bytes)
30938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
30948ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_more32bytes):
30958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	/* Copy 32 bytes at a time.  */
30968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %ecx
3097c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-8(%edi), %xmm0
3098c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -8(%edx)
3099c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-16(%edi), %xmm0
3100c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -16(%edx)
3101c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-24(%edi), %xmm0
3102c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -24(%edx)
3103c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	-32(%edi), %xmm0
3104c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movq	%xmm0, -32(%edx)
31058ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$32, %edx
3106c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$32, %edi
31078ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31088ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_less32bytes):
3109c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	%edi, %eax
31108ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	%ecx, %edx
31118ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	%ecx, %eax
3112c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	POP (%edi)
31138ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_less32bytes_2):
31148ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
31158ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3116c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	CFI_PUSH (%edi)
3117c47703a521abab120100673d5281f71bc8ba9a49Jack Ren
3118c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
31198ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_align):
31208ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$8, %ecx
31218ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jbe	L(bk_write_less32bytes)
31228ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	testl	$1, %edx
31238ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
3124c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	then	(EDX & 2) must be != 0.  */
31258ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jz	L(bk_got2)
3126c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$1, %edi
31278ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$1, %ecx
31288ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$1, %edx
3129c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzbl	(%edi), %eax
31308ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movb	%al, (%edx)
31318ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31328ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	testl	$2, %edx
31338ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jz	L(bk_aligned_4)
31348ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31358ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_got2):
3136c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$2, %edi
31378ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$2, %ecx
31388ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$2, %edx
3139c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movzwl	(%edi), %eax
31408ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movw	%ax, (%edx)
31418ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jmp	L(bk_aligned_4)
31428ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3143c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
31448ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_write_more64bytes):
31458ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	/* Check alignment of last byte.  */
31468ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	testl	$15, %edx
31478ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jz	L(bk_ssse3_cpy_pre)
31488ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31498ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare/* EDX is aligned 4 bytes, but not 16 bytes.  */
31508ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_ssse3_align):
3151c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$4, %edi
31528ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$4, %ecx
31538ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$4, %edx
3154c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	(%edi), %eax
31558ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%eax, (%edx)
31568ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31578ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	testl	$15, %edx
31588ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jz	L(bk_ssse3_cpy_pre)
31598ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3160c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$4, %edi
31618ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$4, %ecx
31628ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$4, %edx
3163c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	(%edi), %eax
31648ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%eax, (%edx)
31658ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31668ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	testl	$15, %edx
31678ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jz	L(bk_ssse3_cpy_pre)
31688ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3169c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$4, %edi
31708ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$4, %ecx
31718ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$4, %edx
3172c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movl	(%edi), %eax
31738ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movl	%eax, (%edx)
31748ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31758ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_ssse3_cpy_pre):
31768ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$64, %ecx
31778ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jb	L(bk_write_more32bytes)
31788ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
3179c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	.p2align 4
31808ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareL(bk_ssse3_cpy):
3181c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	sub	$64, %edi
31828ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$64, %ecx
31838ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	sub	$64, %edx
3184c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	0x30(%edi), %xmm3
31858ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm3, 0x30(%edx)
3186c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	0x20(%edi), %xmm2
31878ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm2, 0x20(%edx)
3188c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	0x10(%edi), %xmm1
31898ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm1, 0x10(%edx)
3190c47703a521abab120100673d5281f71bc8ba9a49Jack Ren	movdqu	(%edi), %xmm0
31918ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	movdqa	%xmm0, (%edx)
31928ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	cmp	$64, %ecx
31938ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jae	L(bk_ssse3_cpy)
31948ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare	jmp	L(bk_write_64bytesless)
31958ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31968ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare#endif
31978ff1a2759a6389bed30d7862d0beb76077032c99Bruce Beare
31988ff1a2759a6389bed30d7862d0beb76077032c99Bruce BeareEND (MEMCPY)
3199