1a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik/*
2a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikCopyright (c) 2014, Intel Corporation
3a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikAll rights reserved.
4a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
5a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikRedistribution and use in source and binary forms, with or without
6a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchikmodification, are permitted provided that the following conditions are met:
7a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
8a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * Redistributions of source code must retain the above copyright notice,
9a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * this list of conditions and the following disclaimer.
10a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
11a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * Redistributions in binary form must reproduce the above copyright notice,
12a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * this list of conditions and the following disclaimer in the documentation
13a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * and/or other materials provided with the distribution.
14a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
15a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * Neither the name of Intel Corporation nor the names of its contributors
16a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * may be used to endorse or promote products derived from this software
17a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik    * without specific prior written permission.
18a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
19a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik*/
30a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
31a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#include "cache.h"
32a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
33a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef MEMSET
34a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define MEMSET		memset
35a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
36a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
37a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef L
38a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define L(label)	.L##label
39a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
40a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
41a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef ALIGN
42a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define ALIGN(n)	.p2align n
43a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
44a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
45a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef cfi_startproc
46a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define cfi_startproc			.cfi_startproc
47a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
48a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
49a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef cfi_endproc
50a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define cfi_endproc			.cfi_endproc
51a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
52a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
53a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef ENTRY
54a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define ENTRY(name)			\
55a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	.type name,  @function;	\
56a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	.globl name;			\
57a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchikname:					\
58a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cfi_startproc
59a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
60a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
61a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef END
62a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define END(name)			\
63a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cfi_endproc;			\
64a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	.size name, .-name
65a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
66a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
67a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	.section .text.sse2,"ax",@progbits
68a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikENTRY (MEMSET)
69a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movq	%rdi, %rax
70a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifdef USE_AS_BZERO_P
71a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	mov	%rsi, %rdx
72a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	xor	%rcx, %rcx
73a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#else
74a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	and	$0xff, %rsi
75a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	mov	$0x0101010101010101, %rcx
76a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	imul	%rsi, %rcx
77a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
78a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmpq	$16, %rdx
79a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jae	L(16bytesormore)
80a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	testb	$8, %dl
81a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jnz	L(8_15bytes)
82a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	testb	$4, %dl
83a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jnz	L(4_7bytes)
84a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	testb	$2, %dl
85a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jnz	L(2_3bytes)
86a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	testb	$1, %dl
87a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jz	L(return)
88a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movb	%cl, (%rdi)
89a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(return):
90a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ret
91a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
92a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(8_15bytes):
93a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movq	%rcx, (%rdi)
94a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movq	%rcx, -8(%rdi, %rdx)
95a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ret
96a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
97a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(4_7bytes):
98a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movl	%ecx, (%rdi)
99a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movl	%ecx, -4(%rdi, %rdx)
100a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ret
101a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
102a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(2_3bytes):
103a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movw	%cx, (%rdi)
104a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movw	%cx, -2(%rdi, %rdx)
105a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ret
106a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
107a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ALIGN (4)
108a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(16bytesormore):
109a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifdef USE_AS_BZERO_P
110a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	pxor	%xmm0, %xmm0
111a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#else
112a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movd	%rcx, %xmm0
113a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	pshufd	$0, %xmm0, %xmm0
114a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
115a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, (%rdi)
116a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, -16(%rdi, %rdx)
117a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmpq	$32, %rdx
118a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jbe	L(32bytesless)
119a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, 16(%rdi)
120a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, -32(%rdi, %rdx)
121a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmpq	$64, %rdx
122a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jbe	L(64bytesless)
123a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, 32(%rdi)
124a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, 48(%rdi)
125a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, -64(%rdi, %rdx)
126a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqu	%xmm0, -48(%rdi, %rdx)
127a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmpq	$128, %rdx
128a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ja	L(128bytesmore)
129a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(32bytesless):
130a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(64bytesless):
131a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ret
132a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
133a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ALIGN (4)
134a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(128bytesmore):
135a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	leaq	64(%rdi), %rcx
136a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	andq	$-64, %rcx
137a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movq	%rdx, %r8
138a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	addq	%rdi, %rdx
139a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	andq	$-64, %rdx
140a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmpq	%rcx, %rdx
141a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	je	L(return)
142a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
143a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifdef SHARED_CACHE_SIZE
144a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmp	$SHARED_CACHE_SIZE, %r8
145a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#else
146a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmp	__x86_64_shared_cache_size(%rip), %r8
147a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif
148a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ja	L(128bytesmore_nt)
149a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
150a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ALIGN (4)
151a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(128bytesmore_normal):
152a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movdqa	%xmm0, (%rcx)
153a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movaps	%xmm0, 0x10(%rcx)
154a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movaps	%xmm0, 0x20(%rcx)
155a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movaps	%xmm0, 0x30(%rcx)
156a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	addq	$64, %rcx
157a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmpq	%rcx, %rdx
158a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jne	L(128bytesmore_normal)
159a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ret
160a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
161a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ALIGN (4)
162a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(128bytesmore_nt):
163a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movntdq	%xmm0, (%rcx)
164a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movntdq	%xmm0, 0x10(%rcx)
165a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movntdq	%xmm0, 0x20(%rcx)
166a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	movntdq	%xmm0, 0x30(%rcx)
167a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	leaq	64(%rcx), %rcx
168a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	cmpq	%rcx, %rdx
169a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	jne	L(128bytesmore_nt)
170a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	sfence
171a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik	ret
172a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik
173a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikEND (MEMSET)
174