1a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik/* 2a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikCopyright (c) 2014, Intel Corporation 3a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikAll rights reserved. 4a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 5a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikRedistribution and use in source and binary forms, with or without 6a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchikmodification, are permitted provided that the following conditions are met: 7a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 8a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * Redistributions of source code must retain the above copyright notice, 9a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * this list of conditions and the following disclaimer. 10a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 11a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * Redistributions in binary form must reproduce the above copyright notice, 12a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * this list of conditions and the following disclaimer in the documentation 13a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * and/or other materials provided with the distribution. 14a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 15a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * Neither the name of Intel Corporation nor the names of its contributors 16a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * may be used to endorse or promote products derived from this software 17a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik * without specific prior written permission. 18a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 19a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik*/ 30a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 31a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#include "cache.h" 32a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 33a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef MEMSET 34a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define MEMSET memset 35a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 36a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 37a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef L 38a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define L(label) .L##label 39a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 40a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 41a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef ALIGN 42a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define ALIGN(n) .p2align n 43a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 44a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 45a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef cfi_startproc 46a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define cfi_startproc .cfi_startproc 47a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 48a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 49a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef cfi_endproc 50a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define cfi_endproc .cfi_endproc 51a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 52a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 53a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef ENTRY 54a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define ENTRY(name) \ 55a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik .type name, @function; \ 56a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik .globl name; \ 57a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchikname: \ 58a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cfi_startproc 59a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 60a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 61a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifndef END 62a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik# define END(name) \ 63a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cfi_endproc; \ 64a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik .size name, .-name 65a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 66a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 67a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik .section .text.sse2,"ax",@progbits 68a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikENTRY (MEMSET) 69a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movq %rdi, %rax 70a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifdef USE_AS_BZERO_P 71a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik mov %rsi, %rdx 72a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik xor %rcx, %rcx 73a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#else 74a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik and $0xff, %rsi 75a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik mov $0x0101010101010101, %rcx 76a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik imul %rsi, %rcx 77a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 78a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmpq $16, %rdx 79a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jae L(16bytesormore) 80a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik testb $8, %dl 81a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jnz L(8_15bytes) 82a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik testb $4, %dl 83a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jnz L(4_7bytes) 84a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik testb $2, %dl 85a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jnz L(2_3bytes) 86a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik testb $1, %dl 87a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jz L(return) 88a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movb %cl, (%rdi) 89a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(return): 90a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ret 91a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 92a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(8_15bytes): 93a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movq %rcx, (%rdi) 94a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movq %rcx, -8(%rdi, %rdx) 95a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ret 96a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 97a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(4_7bytes): 98a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movl %ecx, (%rdi) 99a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movl %ecx, -4(%rdi, %rdx) 100a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ret 101a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 102a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(2_3bytes): 103a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movw %cx, (%rdi) 104a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movw %cx, -2(%rdi, %rdx) 105a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ret 106a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 107a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ALIGN (4) 108a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(16bytesormore): 109a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifdef USE_AS_BZERO_P 110a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik pxor %xmm0, %xmm0 111a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#else 112a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movd %rcx, %xmm0 113a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik pshufd $0, %xmm0, %xmm0 114a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 115a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, (%rdi) 116a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, -16(%rdi, %rdx) 117a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmpq $32, %rdx 118a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jbe L(32bytesless) 119a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, 16(%rdi) 120a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, -32(%rdi, %rdx) 121a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmpq $64, %rdx 122a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jbe L(64bytesless) 123a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, 32(%rdi) 124a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, 48(%rdi) 125a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, -64(%rdi, %rdx) 126a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqu %xmm0, -48(%rdi, %rdx) 127a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmpq $128, %rdx 128a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ja L(128bytesmore) 129a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(32bytesless): 130a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(64bytesless): 131a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ret 132a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 133a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ALIGN (4) 134a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(128bytesmore): 135a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik leaq 64(%rdi), %rcx 136a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik andq $-64, %rcx 137a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movq %rdx, %r8 138a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik addq %rdi, %rdx 139a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik andq $-64, %rdx 140a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmpq %rcx, %rdx 141a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik je L(return) 142a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 143a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#ifdef SHARED_CACHE_SIZE 144a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmp $SHARED_CACHE_SIZE, %r8 145a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#else 146a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmp __x86_64_shared_cache_size(%rip), %r8 147a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik#endif 148a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ja L(128bytesmore_nt) 149a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 150a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ALIGN (4) 151a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(128bytesmore_normal): 152a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movdqa %xmm0, (%rcx) 153a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movaps %xmm0, 0x10(%rcx) 154a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movaps %xmm0, 0x20(%rcx) 155a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movaps %xmm0, 0x30(%rcx) 156a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik addq $64, %rcx 157a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmpq %rcx, %rdx 158a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jne L(128bytesmore_normal) 159a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ret 160a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 161a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ALIGN (4) 162a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikL(128bytesmore_nt): 163a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movntdq %xmm0, (%rcx) 164a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movntdq %xmm0, 0x10(%rcx) 165a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movntdq %xmm0, 0x20(%rcx) 166a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik movntdq %xmm0, 0x30(%rcx) 167a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik leaq 64(%rcx), %rcx 168a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik cmpq %rcx, %rdx 169a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik jne L(128bytesmore_nt) 170a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik sfence 171a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik ret 172a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara Rainchik 173a020a244ae0a185cd53905ec84093e5e6eb7d88cVarvara RainchikEND (MEMSET) 174