15a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/*
25a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikCopyright (c) 2014, Intel Corporation
35a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikAll rights reserved.
45a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
55a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikRedistribution and use in source and binary forms, with or without
65a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikmodification, are permitted provided that the following conditions are met:
75a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
85a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * Redistributions of source code must retain the above copyright notice,
95a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * this list of conditions and the following disclaimer.
105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * Redistributions in binary form must reproduce the above copyright notice,
125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * this list of conditions and the following disclaimer in the documentation
135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * and/or other materials provided with the distribution.
145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * Neither the name of Intel Corporation nor the names of its contributors
165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * may be used to endorse or promote products derived from this software
175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * without specific prior written permission.
185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik*/
305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#include "cache.h"
325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef MEMMOVE
345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define MEMMOVE	memmove
355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef L
385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define L(label)	.L##label
395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_startproc
425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_startproc	.cfi_startproc
435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_endproc
465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_endproc	.cfi_endproc
475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_rel_offset
505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_restore
545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_restore(reg)	.cfi_restore reg
555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_adjust_cfa_offset
585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef ENTRY
625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define ENTRY(name)		\
635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.type name,  @function;		\
645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.globl name;		\
655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4;		\
665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikname:		\
675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cfi_startproc
685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef END
715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define END(name)		\
725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cfi_endproc;		\
735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.size name, .-name
745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
76bed110af26f947057fd7940ba383b6f562d2df97Elliott Hughes#define DEST		PARMS
77bed110af26f947057fd7940ba383b6f562d2df97Elliott Hughes#define SRC		DEST+4
78bed110af26f947057fd7940ba383b6f562d2df97Elliott Hughes#define LEN		SRC+4
795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_PUSH(REG)		\
815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_adjust_cfa_offset (4);		\
825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_rel_offset (REG, 0)
835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_POP(REG)		\
855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_adjust_cfa_offset (-4);		\
865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_restore (REG)
875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define POP(REG)	popl REG; CFI_POP (REG)
905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PARMS		8		/* Preserve EBX.  */
925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define ENTRANCE	PUSH (%ebx);
935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN_END	POP (%ebx); ret
945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN		RETURN_END; CFI_PUSH (%ebx)
955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.section .text.sse2,"ax",@progbits
975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikENTRY (MEMMOVE)
985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ENTRANCE
995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	LEN(%esp), %ecx
1005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	SRC(%esp), %eax
1015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	DEST(%esp), %edx
1025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Check whether we should copy backward or forward.  */
1045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%eax, %edx
1055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return)
106fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jg	L(mm_len_0_or_more_backward)
1075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
1095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	separately.  */
1105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$16, %ecx
1115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jbe	L(mm_len_0_16_bytes_forward)
1125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
113fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$32, %ecx
114fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	ja	L(mm_len_32_or_more_forward)
1155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return.  */
1175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
1185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm1
1195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
1205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, -16(%edx, %ecx)
1215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
1225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_forward):
124fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$64, %ecx
125fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	ja	L(mm_len_64_or_more_forward)
1265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return.  */
1285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
1295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
1305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm2
1315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm3
1325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
1335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
1345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, -16(%edx, %ecx)
1355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, -32(%edx, %ecx)
1365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
1375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_forward):
139fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$128, %ecx
140fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	ja	L(mm_len_128_or_more_forward)
1415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return.  */
1435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
1445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
1455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	32(%eax), %xmm2
1465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	48(%eax), %xmm3
1475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%eax, %ecx), %xmm4
1485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%eax, %ecx), %xmm5
1495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm6
1505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm7
1515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
1525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
1535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, 32(%edx)
1545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, 48(%edx)
1555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm4, -64(%edx, %ecx)
1565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm5, -48(%edx, %ecx)
1575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm6, -32(%edx, %ecx)
1585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm7, -16(%edx, %ecx)
1595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
1605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_forward):
1625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%esi)
1635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%edi)
1645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination.  */
166fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax), %xmm0
167fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax), %xmm1
168fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax), %xmm2
169fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax), %xmm3
1705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
171fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	64(%edx), %edi
172fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	andl	$-64, %edi
173fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	subl	%edx, %eax
1745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
175fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax, %edi), %xmm4
176fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax, %edi), %xmm5
177fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax, %edi), %xmm6
178fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax, %edi), %xmm7
1795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
180fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm0, (%edx)
181fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm1, 16(%edx)
182fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm2, 32(%edx)
183fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm3, 48(%edx)
184fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqa	%xmm4, (%edi)
185fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm5, 16(%edi)
186fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm6, 32(%edi)
187fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm7, 48(%edi)
188fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	addl	$64, %edi
1895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
190fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	(%edx, %ecx), %ebx
1915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	andl	$-64, %ebx
192fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	%edi, %ebx
1935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jbe	L(mm_copy_remaining_forward)
1945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
195fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
196fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jae	L(mm_large_page_loop_forward)
197fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik
1985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
1995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_forward):
2005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
201fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	prefetcht0 128(%eax, %edi)
202fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik
203fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax, %edi), %xmm0
204fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax, %edi), %xmm1
205fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax, %edi), %xmm2
206fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax, %edi), %xmm3
207fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqa	%xmm0, (%edi)
208fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm1, 16(%edi)
209fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm2, 32(%edi)
210fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm3, 48(%edi)
211fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	64(%edi), %edi
212fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	%edi, %ebx
2135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_main_loop_forward)
2145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_copy_remaining_forward):
216fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	addl	%edx, %ecx
217fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	subl	%edi, %ecx
218fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* We copied all up till %edi position in the dst.
2195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	In %ecx now is how many bytes are left to copy.
2205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	Now we need to advance %esi. */
221fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	(%edi, %eax), %esi
2225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_0_64_bytes_forward):
2245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$32, %ecx
2255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_33_64_bytes_forward)
2265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$16, %ecx
2275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_17_32_bytes_forward)
2285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testl	%ecx, %ecx
2295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,2
2305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return_pop_all)
2315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmpb	$8, %cl
2335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_9_16_bytes_forward)
2345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmpb	$4, %cl
2355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,5
2365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_5_8_bytes_forward)
2375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmpb	$2, %cl
2385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,1
2395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_3_4_bytes_forward)
2405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	-1(%esi,%ecx), %eax
2415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	(%esi), %ebx
242fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movb	%al, -1(%edi,%ecx)
243fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movb	%bl, (%edi)
2445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_33_64_bytes_forward):
2475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%esi), %xmm0
2485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%esi), %xmm1
2495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%esi, %ecx), %xmm2
2505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%esi, %ecx), %xmm3
251fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm0, (%edi)
252fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm1, 16(%edi)
253fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm2, -32(%edi, %ecx)
254fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm3, -16(%edi, %ecx)
2555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_17_32_bytes_forward):
2585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%esi), %xmm0
2595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%esi, %ecx), %xmm1
260fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm0, (%edi)
261fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm1, -16(%edi, %ecx)
2625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
264fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_9_16_bytes_forward):
265fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	(%esi), %xmm0
266fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	-8(%esi, %ecx), %xmm1
267fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	%xmm0, (%edi)
268fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	%xmm1, -8(%edi, %ecx)
2695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_5_8_bytes_forward):
2725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	(%esi), %eax
2735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%esi,%ecx), %ebx
274fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%eax, (%edi)
275fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%ebx, -4(%edi,%ecx)
2765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
278fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_3_4_bytes_forward):
279fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movzwl	-2(%esi,%ecx), %eax
280fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movzwl	(%esi), %ebx
281fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movw	%ax, -2(%edi,%ecx)
282fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movw	%bx, (%edi)
2835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_forward):
2865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$24, %cl
2875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_9_16_bytes_forward)
2885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$4, %cl
2895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,5
2905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_5_8_bytes_forward)
2915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testl	%ecx, %ecx
2925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,2
2935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return)
2945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$2, %cl
2955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,1
2965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_2_4_bytes_forward)
2975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	-1(%eax,%ecx), %ebx
2985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	(%eax), %eax
2995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%bl, -1(%edx,%ecx)
3005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%al, (%edx)
3015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_2_4_bytes_forward):
3045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	-2(%eax,%ecx), %ebx
3055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	(%eax), %eax
3065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%bx, -2(%edx,%ecx)
3075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%ax, (%edx)
3085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_forward):
3115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	(%eax), %ebx
3125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%eax,%ecx), %eax
3135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%ebx, (%edx)
3145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%eax, -4(%edx,%ecx)
3155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_forward):
3185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	(%eax), %xmm0
3195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	-8(%eax, %ecx), %xmm1
3205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	%xmm0, (%edx)
3215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	%xmm1, -8(%edx, %ecx)
3225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
32497b6e131fd85bb0ae321d9e5a8a613f5ee373cf4Christopher Ferris	CFI_POP (%edi)
32597b6e131fd85bb0ae321d9e5a8a613f5ee373cf4Christopher Ferris	CFI_POP (%esi)
32697b6e131fd85bb0ae321d9e5a8a613f5ee373cf4Christopher Ferris
327fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_recalc_len):
328fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Compute in %ecx how many bytes are left to copy after
329fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	the main loop stops.  */
330fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%ebx, %ecx
331fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	subl	%edx, %ecx
3325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* The code for copying backwards.  */
3335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_or_more_backward):
3345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
335fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
3365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	separately.  */
3375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$16, %ecx
3385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jbe	L(mm_len_0_16_bytes_backward)
3395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
340fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$32, %ecx
3415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jg	L(mm_len_32_or_more_backward)
3425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return.  */
3445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
3455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm1
3465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
3475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, -16(%edx, %ecx)
3485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_backward):
351fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$64, %ecx
3525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jg	L(mm_len_64_or_more_backward)
3535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return.  */
3555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
3565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
3575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm2
3585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm3
3595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
3605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
3615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, -16(%edx, %ecx)
3625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, -32(%edx, %ecx)
3635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_backward):
366fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$128, %ecx
3675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jg	L(mm_len_128_or_more_backward)
3685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return.  */
3705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
3715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
3725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	32(%eax), %xmm2
3735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	48(%eax), %xmm3
3745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%eax, %ecx), %xmm4
3755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%eax, %ecx), %xmm5
3765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm6
3775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm7
3785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
3795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
3805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, 32(%edx)
3815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, 48(%edx)
3825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm4, -64(%edx, %ecx)
3835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm5, -48(%edx, %ecx)
3845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm6, -32(%edx, %ecx)
3855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm7, -16(%edx, %ecx)
3865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_backward):
3895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%esi)
3905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%edi)
3915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination. We need to save
3935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	16 bits from the source in order not to overwrite them.  */
3945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm0
3955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm1
3965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%eax, %ecx), %xmm2
3975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%eax, %ecx), %xmm3
3985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	(%edx, %ecx), %edi
4005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	andl	$-64, %edi
4015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%eax, %esi
4035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	subl	%edx, %esi
4045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%edi, %esi), %xmm4
4065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%edi, %esi), %xmm5
4075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%edi, %esi), %xmm6
4085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%edi, %esi), %xmm7
4095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, -16(%edx, %ecx)
4115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, -32(%edx, %ecx)
4125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, -48(%edx, %ecx)
4135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, -64(%edx, %ecx)
4145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm4, -16(%edi)
4155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm5, -32(%edi)
4165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm6, -48(%edi)
4175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm7, -64(%edi)
4185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	-64(%edi), %edi
4195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	64(%edx), %ebx
4215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	andl	$-64, %ebx
4225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%edi, %ebx
424fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jae	L(mm_main_loop_backward_end)
4255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
426fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
427fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jae	L(mm_large_page_loop_backward)
4285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
4305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_backward):
4315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	prefetcht0 -128(%edi, %esi)
4335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%edi, %esi), %xmm0
4355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%edi, %esi), %xmm1
4365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%edi, %esi), %xmm2
4375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%edi, %esi), %xmm3
4385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm0, -64(%edi)
4395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm1, -48(%edi)
4405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm2, -32(%edi)
4415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm3, -16(%edi)
4425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	-64(%edi), %edi
4435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%edi, %ebx
4445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jb	L(mm_main_loop_backward)
445fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_main_loop_backward_end):
4465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%edi)
4475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
448fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jmp	L(mm_recalc_len)
4495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..16] and return.  */
4515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_backward):
4525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$24, %cl
4535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jnz	L(mm_len_9_16_bytes_backward)
4545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$4, %cl
4555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,5
4565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jnz	L(mm_len_5_8_bytes_backward)
4575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testl	%ecx, %ecx
4585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,2
4595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return)
4605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$2, %cl
4615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,1
4625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_3_4_bytes_backward)
4635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	-1(%eax,%ecx), %ebx
4645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	(%eax), %eax
4655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%bl, -1(%edx,%ecx)
4665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%al, (%edx)
4675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
4685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_3_4_bytes_backward):
4705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	-2(%eax,%ecx), %ebx
4715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	(%eax), %eax
4725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%bx, -2(%edx,%ecx)
4735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%ax, (%edx)
4745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
4755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_backward):
4775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%esi)
4785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%eax,%ecx), %ebx
4795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-8(%eax,%ecx), %esi
4805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%ebx, -4(%edx,%ecx)
4815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%esi, -8(%edx,%ecx)
4825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	subl	$8, %ecx
4835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
4845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_len_0_16_bytes_backward)
4855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_backward):
4875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	(%eax), %ebx
4885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%eax,%ecx), %eax
4895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%ebx, (%edx)
4905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%eax, -4(%edx,%ecx)
4915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return):
4935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%edx, %eax
4945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	RETURN
4955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return_pop_all):
497fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%edx, %eax
4985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%edi)
4995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
5005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	RETURN
5015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy forward part.  */
5035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
5055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_forward):
506fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax, %edi), %xmm0
507fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax, %edi), %xmm1
508fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax, %edi), %xmm2
509fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax, %edi), %xmm3
510fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm0, (%edi)
511fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm1, 16(%edi)
512fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm2, 32(%edi)
513fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm3, 48(%edi)
514fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	64(%edi), %edi
515fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	%edi, %ebx
5165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_large_page_loop_forward)
5175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	sfence
518fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jmp	L(mm_copy_remaining_forward)
5195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy backward part.  */
5215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
5225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_backward):
5235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%edi, %esi), %xmm0
5245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%edi, %esi), %xmm1
5255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%edi, %esi), %xmm2
5265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%edi, %esi), %xmm3
5275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm0, -64(%edi)
5285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm1, -48(%edi)
5295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm2, -32(%edi)
5305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm3, -16(%edi)
5315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	-64(%edi), %edi
5325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%edi, %ebx
5335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jb	L(mm_large_page_loop_backward)
534fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	sfence
5355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%edi)
5365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
537fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jmp	L(mm_recalc_len)
5385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikEND (MEMMOVE)
540