15a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/*
25a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikCopyright (c) 2014, Intel Corporation
35a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikAll rights reserved.
45a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
55a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikRedistribution and use in source and binary forms, with or without
65a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikmodification, are permitted provided that the following conditions are met:
75a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
85a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * Redistributions of source code must retain the above copyright notice,
95a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * this list of conditions and the following disclaimer.
105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * Redistributions in binary form must reproduce the above copyright notice,
125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * this list of conditions and the following disclaimer in the documentation
135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * and/or other materials provided with the distribution.
145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * Neither the name of Intel Corporation nor the names of its contributors
165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * may be used to endorse or promote products derived from this software
175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik    * without specific prior written permission.
185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik*/
305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#include "cache.h"
325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef MEMMOVE
345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define MEMMOVE	memmove
355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef L
385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define L(label)	.L##label
395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_startproc
425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_startproc	.cfi_startproc
435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_endproc
465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_endproc	.cfi_endproc
475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_rel_offset
505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_restore
545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_restore(reg)	.cfi_restore reg
555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_adjust_cfa_offset
585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef ENTRY
625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define ENTRY(name)		\
635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.type name,  @function;		\
645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.globl name;		\
655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4;		\
665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikname:		\
675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cfi_startproc
685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef END
715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define END(name)		\
725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cfi_endproc;		\
735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.size name, .-name
745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifdef USE_AS_BCOPY
77fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define SRC		PARMS
78fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define DEST		SRC+4
79fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define LEN		DEST+4
805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#else
81fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define DEST		PARMS
82fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define SRC		DEST+4
83fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define LEN		SRC+4
845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif
855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_PUSH(REG)		\
875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_adjust_cfa_offset (4);		\
885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_rel_offset (REG, 0)
895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_POP(REG)		\
915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_adjust_cfa_offset (-4);		\
925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik  cfi_restore (REG)
935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define POP(REG)	popl REG; CFI_POP (REG)
965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PARMS		8		/* Preserve EBX.  */
985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define ENTRANCE	PUSH (%ebx);
995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN_END	POP (%ebx); ret
1005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN		RETURN_END; CFI_PUSH (%ebx)
1015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.section .text.sse2,"ax",@progbits
1035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikENTRY (MEMMOVE)
1045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ENTRANCE
1055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	LEN(%esp), %ecx
1065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	SRC(%esp), %eax
1075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	DEST(%esp), %edx
1085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Check whether we should copy backward or forward.  */
1105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%eax, %edx
1115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return)
112fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jg	L(mm_len_0_or_more_backward)
1135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
1155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	separately.  */
1165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$16, %ecx
1175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jbe	L(mm_len_0_16_bytes_forward)
1185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
119fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$32, %ecx
120fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	ja	L(mm_len_32_or_more_forward)
1215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return.  */
1235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
1245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm1
1255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
1265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, -16(%edx, %ecx)
1275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
1285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_forward):
130fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$64, %ecx
131fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	ja	L(mm_len_64_or_more_forward)
1325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return.  */
1345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
1355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
1365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm2
1375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm3
1385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
1395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
1405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, -16(%edx, %ecx)
1415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, -32(%edx, %ecx)
1425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
1435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_forward):
145fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$128, %ecx
146fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	ja	L(mm_len_128_or_more_forward)
1475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return.  */
1495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
1505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
1515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	32(%eax), %xmm2
1525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	48(%eax), %xmm3
1535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%eax, %ecx), %xmm4
1545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%eax, %ecx), %xmm5
1555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm6
1565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm7
1575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
1585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
1595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, 32(%edx)
1605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, 48(%edx)
1615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm4, -64(%edx, %ecx)
1625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm5, -48(%edx, %ecx)
1635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm6, -32(%edx, %ecx)
1645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm7, -16(%edx, %ecx)
1655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
1665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_forward):
1685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%esi)
1695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%edi)
1705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
1715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination.  */
172fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax), %xmm0
173fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax), %xmm1
174fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax), %xmm2
175fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax), %xmm3
1765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
177fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	64(%edx), %edi
178fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	andl	$-64, %edi
179fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	subl	%edx, %eax
1805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
181fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax, %edi), %xmm4
182fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax, %edi), %xmm5
183fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax, %edi), %xmm6
184fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax, %edi), %xmm7
1855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
186fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm0, (%edx)
187fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm1, 16(%edx)
188fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm2, 32(%edx)
189fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm3, 48(%edx)
190fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqa	%xmm4, (%edi)
191fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm5, 16(%edi)
192fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm6, 32(%edi)
193fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm7, 48(%edi)
194fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	addl	$64, %edi
1955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
196fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	(%edx, %ecx), %ebx
1975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	andl	$-64, %ebx
198fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	%edi, %ebx
1995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jbe	L(mm_copy_remaining_forward)
2005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
201fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
202fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jae	L(mm_large_page_loop_forward)
203fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik
2045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
2055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_forward):
2065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
207fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	prefetcht0 128(%eax, %edi)
208fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik
209fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax, %edi), %xmm0
210fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax, %edi), %xmm1
211fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax, %edi), %xmm2
212fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax, %edi), %xmm3
213fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqa	%xmm0, (%edi)
214fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm1, 16(%edi)
215fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm2, 32(%edi)
216fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movaps	%xmm3, 48(%edi)
217fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	64(%edi), %edi
218fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	%edi, %ebx
2195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_main_loop_forward)
2205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_copy_remaining_forward):
222fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	addl	%edx, %ecx
223fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	subl	%edi, %ecx
224fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* We copied all up till %edi position in the dst.
2255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	In %ecx now is how many bytes are left to copy.
2265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	Now we need to advance %esi. */
227fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	(%edi, %eax), %esi
2285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_0_64_bytes_forward):
2305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$32, %ecx
2315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_33_64_bytes_forward)
2325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$16, %ecx
2335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_17_32_bytes_forward)
2345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testl	%ecx, %ecx
2355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,2
2365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return_pop_all)
2375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmpb	$8, %cl
2395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_9_16_bytes_forward)
2405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmpb	$4, %cl
2415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,5
2425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_5_8_bytes_forward)
2435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmpb	$2, %cl
2445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,1
2455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_remaining_3_4_bytes_forward)
2465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	-1(%esi,%ecx), %eax
2475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	(%esi), %ebx
248fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movb	%al, -1(%edi,%ecx)
249fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movb	%bl, (%edi)
2505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_33_64_bytes_forward):
2535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%esi), %xmm0
2545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%esi), %xmm1
2555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%esi, %ecx), %xmm2
2565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%esi, %ecx), %xmm3
257fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm0, (%edi)
258fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm1, 16(%edi)
259fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm2, -32(%edi, %ecx)
260fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm3, -16(%edi, %ecx)
2615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_17_32_bytes_forward):
2645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%esi), %xmm0
2655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%esi, %ecx), %xmm1
266fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm0, (%edi)
267fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	%xmm1, -16(%edi, %ecx)
2685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
270fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_9_16_bytes_forward):
271fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	(%esi), %xmm0
272fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	-8(%esi, %ecx), %xmm1
273fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	%xmm0, (%edi)
274fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movq	%xmm1, -8(%edi, %ecx)
2755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_5_8_bytes_forward):
2785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	(%esi), %eax
2795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%esi,%ecx), %ebx
280fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%eax, (%edi)
281fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%ebx, -4(%edi,%ecx)
2825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
284fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_3_4_bytes_forward):
285fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movzwl	-2(%esi,%ecx), %eax
286fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movzwl	(%esi), %ebx
287fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movw	%ax, -2(%edi,%ecx)
288fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movw	%bx, (%edi)
2895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return_pop_all)
2905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
2915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_forward):
2925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$24, %cl
2935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_9_16_bytes_forward)
2945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$4, %cl
2955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,5
2965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_5_8_bytes_forward)
2975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testl	%ecx, %ecx
2985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,2
2995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return)
3005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$2, %cl
3015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,1
3025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_2_4_bytes_forward)
3035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	-1(%eax,%ecx), %ebx
3045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	(%eax), %eax
3055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%bl, -1(%edx,%ecx)
3065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%al, (%edx)
3075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_2_4_bytes_forward):
3105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	-2(%eax,%ecx), %ebx
3115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	(%eax), %eax
3125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%bx, -2(%edx,%ecx)
3135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%ax, (%edx)
3145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_forward):
3175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	(%eax), %ebx
3185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%eax,%ecx), %eax
3195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%ebx, (%edx)
3205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%eax, -4(%edx,%ecx)
3215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_forward):
3245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	(%eax), %xmm0
3255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	-8(%eax, %ecx), %xmm1
3265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	%xmm0, (%edx)
3275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movq	%xmm1, -8(%edx, %ecx)
3285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
33008d6edf52249e34942d8ed2af6c35b1e2980bc6dChristopher Ferris	CFI_POP (%edi)
33108d6edf52249e34942d8ed2af6c35b1e2980bc6dChristopher Ferris	CFI_POP (%esi)
33208d6edf52249e34942d8ed2af6c35b1e2980bc6dChristopher Ferris
333fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_recalc_len):
334fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Compute in %ecx how many bytes are left to copy after
335fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	the main loop stops.  */
336fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%ebx, %ecx
337fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	subl	%edx, %ecx
3385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* The code for copying backwards.  */
3395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_or_more_backward):
3405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
341fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
3425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	separately.  */
3435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	$16, %ecx
3445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jbe	L(mm_len_0_16_bytes_backward)
3455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
346fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$32, %ecx
3475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jg	L(mm_len_32_or_more_backward)
3485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return.  */
3505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
3515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm1
3525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
3535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, -16(%edx, %ecx)
3545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_backward):
357fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$64, %ecx
3585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jg	L(mm_len_64_or_more_backward)
3595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return.  */
3615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
3625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
3635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm2
3645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm3
3655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
3665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
3675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, -16(%edx, %ecx)
3685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, -32(%edx, %ecx)
3695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_backward):
372fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmpl	$128, %ecx
3735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jg	L(mm_len_128_or_more_backward)
3745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return.  */
3765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	(%eax), %xmm0
3775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	16(%eax), %xmm1
3785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	32(%eax), %xmm2
3795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	48(%eax), %xmm3
3805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%eax, %ecx), %xmm4
3815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%eax, %ecx), %xmm5
3825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm6
3835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm7
3845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, (%edx)
3855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, 16(%edx)
3865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, 32(%edx)
3875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, 48(%edx)
3885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm4, -64(%edx, %ecx)
3895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm5, -48(%edx, %ecx)
3905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm6, -32(%edx, %ecx)
3915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm7, -16(%edx, %ecx)
3925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
3935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_backward):
3955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%esi)
3965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%edi)
3975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
3985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination. We need to save
3995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	16 bits from the source in order not to overwrite them.  */
4005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%eax, %ecx), %xmm0
4015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%eax, %ecx), %xmm1
4025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%eax, %ecx), %xmm2
4035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%eax, %ecx), %xmm3
4045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	(%edx, %ecx), %edi
4065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	andl	$-64, %edi
4075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%eax, %esi
4095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	subl	%edx, %esi
4105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%edi, %esi), %xmm4
4125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%edi, %esi), %xmm5
4135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%edi, %esi), %xmm6
4145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%edi, %esi), %xmm7
4155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm0, -16(%edx, %ecx)
4175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm1, -32(%edx, %ecx)
4185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm2, -48(%edx, %ecx)
4195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	%xmm3, -64(%edx, %ecx)
4205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm4, -16(%edi)
4215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm5, -32(%edi)
4225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm6, -48(%edi)
4235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm7, -64(%edi)
4245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	-64(%edi), %edi
4255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	64(%edx), %ebx
4275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	andl	$-64, %ebx
4285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%edi, %ebx
430fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jae	L(mm_main_loop_backward_end)
4315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
432fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
433fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jae	L(mm_large_page_loop_backward)
4345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
4365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_backward):
4375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	prefetcht0 -128(%edi, %esi)
4395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%edi, %esi), %xmm0
4415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%edi, %esi), %xmm1
4425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%edi, %esi), %xmm2
4435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%edi, %esi), %xmm3
4445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm0, -64(%edi)
4455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm1, -48(%edi)
4465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm2, -32(%edi)
4475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqa	%xmm3, -16(%edi)
4485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	-64(%edi), %edi
4495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%edi, %ebx
4505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jb	L(mm_main_loop_backward)
451fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_main_loop_backward_end):
4525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%edi)
4535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
454fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jmp	L(mm_recalc_len)
4555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..16] and return.  */
4575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_backward):
4585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$24, %cl
4595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jnz	L(mm_len_9_16_bytes_backward)
4605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$4, %cl
4615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,5
4625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jnz	L(mm_len_5_8_bytes_backward)
4635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testl	%ecx, %ecx
4645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,2
4655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	je	L(mm_return)
4665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	testb	$2, %cl
4675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4,,1
4685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jne	L(mm_len_3_4_bytes_backward)
4695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	-1(%eax,%ecx), %ebx
4705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzbl	(%eax), %eax
4715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%bl, -1(%edx,%ecx)
4725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movb	%al, (%edx)
4735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
4745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_3_4_bytes_backward):
4765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	-2(%eax,%ecx), %ebx
4775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movzwl	(%eax), %eax
4785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%bx, -2(%edx,%ecx)
4795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movw	%ax, (%edx)
4805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_return)
4815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_backward):
4835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	PUSH (%esi)
4845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%eax,%ecx), %ebx
4855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-8(%eax,%ecx), %esi
4865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%ebx, -4(%edx,%ecx)
4875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%esi, -8(%edx,%ecx)
4885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	subl	$8, %ecx
4895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
4905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jmp	L(mm_len_0_16_bytes_backward)
4915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_backward):
4935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	(%eax), %ebx
4945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	-4(%eax,%ecx), %eax
4955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%ebx, (%edx)
4965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%eax, -4(%edx,%ecx)
4975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
4985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return):
4995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movl	%edx, %eax
5005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	RETURN
5015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return_pop_all):
503fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movl	%edx, %eax
5045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%edi)
5055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
5065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	RETURN
5075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy forward part.  */
5095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
5115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_forward):
512fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	(%eax, %edi), %xmm0
513fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	16(%eax, %edi), %xmm1
514fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	32(%eax, %edi), %xmm2
515fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movdqu	48(%eax, %edi), %xmm3
516fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm0, (%edi)
517fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm1, 16(%edi)
518fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm2, 32(%edi)
519fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	movntdq	%xmm3, 48(%edi)
520fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	leal	64(%edi), %edi
521fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	cmp	%edi, %ebx
5225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	ja	L(mm_large_page_loop_forward)
5235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	sfence
524fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jmp	L(mm_copy_remaining_forward)
5255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy backward part.  */
5275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	.p2align 4
5285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_backward):
5295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-64(%edi, %esi), %xmm0
5305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-48(%edi, %esi), %xmm1
5315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-32(%edi, %esi), %xmm2
5325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movdqu	-16(%edi, %esi), %xmm3
5335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm0, -64(%edi)
5345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm1, -48(%edi)
5355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm2, -32(%edi)
5365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	movntdq	%xmm3, -16(%edi)
5375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	leal	-64(%edi), %edi
5385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	cmp	%edi, %ebx
5395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	jb	L(mm_large_page_loop_backward)
540fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	sfence
5415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%edi)
5425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik	POP (%esi)
543fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik	jmp	L(mm_recalc_len)
5445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik
5455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikEND (MEMMOVE)
546