15a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* 25a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikCopyright (c) 2014, Intel Corporation 35a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikAll rights reserved. 45a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 55a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikRedistribution and use in source and binary forms, with or without 65a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikmodification, are permitted provided that the following conditions are met: 75a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 85a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * Redistributions of source code must retain the above copyright notice, 95a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * this list of conditions and the following disclaimer. 105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * Redistributions in binary form must reproduce the above copyright notice, 125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * this list of conditions and the following disclaimer in the documentation 135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * and/or other materials provided with the distribution. 145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * Neither the name of Intel Corporation nor the names of its contributors 165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * may be used to endorse or promote products derived from this software 175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * without specific prior written permission. 185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik*/ 305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#include "cache.h" 325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef MEMMOVE 345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define MEMMOVE memmove 355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef L 385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define L(label) .L##label 395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_startproc 425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_startproc .cfi_startproc 435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_endproc 465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_endproc .cfi_endproc 475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_rel_offset 505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_restore 545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_restore(reg) .cfi_restore reg 555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_adjust_cfa_offset 585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef ENTRY 625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define ENTRY(name) \ 635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .type name, @function; \ 645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .globl name; \ 655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4; \ 665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikname: \ 675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_startproc 685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef END 715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define END(name) \ 725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_endproc; \ 735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .size name, .-name 745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifdef USE_AS_BCOPY 77fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define SRC PARMS 78fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define DEST SRC+4 79fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define LEN DEST+4 805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#else 81fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define DEST PARMS 82fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define SRC DEST+4 83fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik# define LEN SRC+4 845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_PUSH(REG) \ 875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_adjust_cfa_offset (4); \ 885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_rel_offset (REG, 0) 895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_POP(REG) \ 915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_adjust_cfa_offset (-4); \ 925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_restore (REG) 935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PUSH(REG) pushl REG; CFI_PUSH (REG) 955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define POP(REG) popl REG; CFI_POP (REG) 965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PARMS 8 /* Preserve EBX. */ 985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define ENTRANCE PUSH (%ebx); 995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN_END POP (%ebx); ret 1005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN RETURN_END; CFI_PUSH (%ebx) 1015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .section .text.sse2,"ax",@progbits 1035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikENTRY (MEMMOVE) 1045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ENTRANCE 1055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl LEN(%esp), %ecx 1065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl SRC(%esp), %eax 1075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl DEST(%esp), %edx 1085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Check whether we should copy backward or forward. */ 1105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %eax, %edx 1115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return) 112fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jg L(mm_len_0_or_more_backward) 1135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128] 1155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik separately. */ 1165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $16, %ecx 1175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jbe L(mm_len_0_16_bytes_forward) 1185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 119fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $32, %ecx 120fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik ja L(mm_len_32_or_more_forward) 1215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return. */ 1235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 1245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm1 1255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 1265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, -16(%edx, %ecx) 1275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 1285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_forward): 130fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $64, %ecx 131fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik ja L(mm_len_64_or_more_forward) 1325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return. */ 1345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 1355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 1365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm2 1375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm3 1385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 1395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 1405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, -16(%edx, %ecx) 1415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, -32(%edx, %ecx) 1425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 1435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_forward): 145fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $128, %ecx 146fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik ja L(mm_len_128_or_more_forward) 1475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return. */ 1495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 1505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 1515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 32(%eax), %xmm2 1525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 48(%eax), %xmm3 1535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%eax, %ecx), %xmm4 1545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%eax, %ecx), %xmm5 1555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm6 1565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm7 1575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 1585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 1595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, 32(%edx) 1605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, 48(%edx) 1615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm4, -64(%edx, %ecx) 1625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm5, -48(%edx, %ecx) 1635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm6, -32(%edx, %ecx) 1645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm7, -16(%edx, %ecx) 1655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 1665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_forward): 1685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%esi) 1695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%edi) 1705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination. */ 172fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax), %xmm0 173fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax), %xmm1 174fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax), %xmm2 175fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax), %xmm3 1765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 177fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal 64(%edx), %edi 178fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik andl $-64, %edi 179fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik subl %edx, %eax 1805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 181fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax, %edi), %xmm4 182fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax, %edi), %xmm5 183fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax, %edi), %xmm6 184fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax, %edi), %xmm7 1855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 186fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm0, (%edx) 187fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm1, 16(%edx) 188fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm2, 32(%edx) 189fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm3, 48(%edx) 190fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqa %xmm4, (%edi) 191fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm5, 16(%edi) 192fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm6, 32(%edi) 193fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm7, 48(%edi) 194fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik addl $64, %edi 1955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 196fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal (%edx, %ecx), %ebx 1975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik andl $-64, %ebx 198fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp %edi, %ebx 1995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jbe L(mm_copy_remaining_forward) 2005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 201fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp $SHARED_CACHE_SIZE_HALF, %ecx 202fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jae L(mm_large_page_loop_forward) 203fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik 2045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 2055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_forward): 2065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 207fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik prefetcht0 128(%eax, %edi) 208fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik 209fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax, %edi), %xmm0 210fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax, %edi), %xmm1 211fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax, %edi), %xmm2 212fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax, %edi), %xmm3 213fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqa %xmm0, (%edi) 214fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm1, 16(%edi) 215fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm2, 32(%edi) 216fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm3, 48(%edi) 217fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal 64(%edi), %edi 218fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp %edi, %ebx 2195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_main_loop_forward) 2205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_copy_remaining_forward): 222fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik addl %edx, %ecx 223fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik subl %edi, %ecx 224fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* We copied all up till %edi position in the dst. 2255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik In %ecx now is how many bytes are left to copy. 2265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik Now we need to advance %esi. */ 227fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal (%edi, %eax), %esi 2285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_0_64_bytes_forward): 2305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $32, %ecx 2315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_33_64_bytes_forward) 2325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $16, %ecx 2335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_17_32_bytes_forward) 2345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testl %ecx, %ecx 2355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,2 2365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return_pop_all) 2375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmpb $8, %cl 2395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_9_16_bytes_forward) 2405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmpb $4, %cl 2415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,5 2425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_5_8_bytes_forward) 2435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmpb $2, %cl 2445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,1 2455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_3_4_bytes_forward) 2465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl -1(%esi,%ecx), %eax 2475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl (%esi), %ebx 248fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movb %al, -1(%edi,%ecx) 249fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movb %bl, (%edi) 2505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_33_64_bytes_forward): 2535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%esi), %xmm0 2545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%esi), %xmm1 2555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%esi, %ecx), %xmm2 2565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%esi, %ecx), %xmm3 257fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm0, (%edi) 258fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm1, 16(%edi) 259fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm2, -32(%edi, %ecx) 260fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm3, -16(%edi, %ecx) 2615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_17_32_bytes_forward): 2645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%esi), %xmm0 2655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%esi, %ecx), %xmm1 266fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm0, (%edi) 267fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm1, -16(%edi, %ecx) 2685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 270fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_9_16_bytes_forward): 271fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq (%esi), %xmm0 272fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq -8(%esi, %ecx), %xmm1 273fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq %xmm0, (%edi) 274fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq %xmm1, -8(%edi, %ecx) 2755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_5_8_bytes_forward): 2785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl (%esi), %eax 2795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%esi,%ecx), %ebx 280fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %eax, (%edi) 281fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %ebx, -4(%edi,%ecx) 2825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 284fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_3_4_bytes_forward): 285fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movzwl -2(%esi,%ecx), %eax 286fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movzwl (%esi), %ebx 287fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movw %ax, -2(%edi,%ecx) 288fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movw %bx, (%edi) 2895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_forward): 2925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $24, %cl 2935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_9_16_bytes_forward) 2945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $4, %cl 2955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,5 2965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_5_8_bytes_forward) 2975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testl %ecx, %ecx 2985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,2 2995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return) 3005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $2, %cl 3015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,1 3025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_2_4_bytes_forward) 3035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl -1(%eax,%ecx), %ebx 3045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl (%eax), %eax 3055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %bl, -1(%edx,%ecx) 3065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %al, (%edx) 3075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_2_4_bytes_forward): 3105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl -2(%eax,%ecx), %ebx 3115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl (%eax), %eax 3125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %bx, -2(%edx,%ecx) 3135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %ax, (%edx) 3145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_forward): 3175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl (%eax), %ebx 3185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%eax,%ecx), %eax 3195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %ebx, (%edx) 3205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %eax, -4(%edx,%ecx) 3215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_forward): 3245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq (%eax), %xmm0 3255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq -8(%eax, %ecx), %xmm1 3265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq %xmm0, (%edx) 3275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq %xmm1, -8(%edx, %ecx) 3285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 33008d6edf52249e34942d8ed2af6c35b1e2980bc6dChristopher Ferris CFI_POP (%edi) 33108d6edf52249e34942d8ed2af6c35b1e2980bc6dChristopher Ferris CFI_POP (%esi) 33208d6edf52249e34942d8ed2af6c35b1e2980bc6dChristopher Ferris 333fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_recalc_len): 334fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Compute in %ecx how many bytes are left to copy after 335fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik the main loop stops. */ 336fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %ebx, %ecx 337fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik subl %edx, %ecx 3385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* The code for copying backwards. */ 3395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_or_more_backward): 3405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 341fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128] 3425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik separately. */ 3435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $16, %ecx 3445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jbe L(mm_len_0_16_bytes_backward) 3455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 346fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $32, %ecx 3475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jg L(mm_len_32_or_more_backward) 3485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return. */ 3505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 3515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm1 3525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 3535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, -16(%edx, %ecx) 3545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_backward): 357fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $64, %ecx 3585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jg L(mm_len_64_or_more_backward) 3595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return. */ 3615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 3625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 3635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm2 3645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm3 3655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 3665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 3675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, -16(%edx, %ecx) 3685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, -32(%edx, %ecx) 3695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_backward): 372fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $128, %ecx 3735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jg L(mm_len_128_or_more_backward) 3745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return. */ 3765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 3775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 3785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 32(%eax), %xmm2 3795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 48(%eax), %xmm3 3805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%eax, %ecx), %xmm4 3815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%eax, %ecx), %xmm5 3825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm6 3835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm7 3845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 3855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 3865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, 32(%edx) 3875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, 48(%edx) 3885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm4, -64(%edx, %ecx) 3895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm5, -48(%edx, %ecx) 3905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm6, -32(%edx, %ecx) 3915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm7, -16(%edx, %ecx) 3925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_backward): 3955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%esi) 3965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%edi) 3975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination. We need to save 3995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 16 bits from the source in order not to overwrite them. */ 4005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm0 4015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm1 4025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%eax, %ecx), %xmm2 4035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%eax, %ecx), %xmm3 4045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal (%edx, %ecx), %edi 4065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik andl $-64, %edi 4075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %eax, %esi 4095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik subl %edx, %esi 4105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%edi, %esi), %xmm4 4125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%edi, %esi), %xmm5 4135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%edi, %esi), %xmm6 4145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%edi, %esi), %xmm7 4155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, -16(%edx, %ecx) 4175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, -32(%edx, %ecx) 4185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, -48(%edx, %ecx) 4195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, -64(%edx, %ecx) 4205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm4, -16(%edi) 4215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm5, -32(%edi) 4225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm6, -48(%edi) 4235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm7, -64(%edi) 4245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal -64(%edi), %edi 4255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal 64(%edx), %ebx 4275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik andl $-64, %ebx 4285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %edi, %ebx 430fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jae L(mm_main_loop_backward_end) 4315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 432fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp $SHARED_CACHE_SIZE_HALF, %ecx 433fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jae L(mm_large_page_loop_backward) 4345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 4365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_backward): 4375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik prefetcht0 -128(%edi, %esi) 4395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%edi, %esi), %xmm0 4415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%edi, %esi), %xmm1 4425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%edi, %esi), %xmm2 4435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%edi, %esi), %xmm3 4445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm0, -64(%edi) 4455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm1, -48(%edi) 4465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm2, -32(%edi) 4475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm3, -16(%edi) 4485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal -64(%edi), %edi 4495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %edi, %ebx 4505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jb L(mm_main_loop_backward) 451fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_main_loop_backward_end): 4525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%edi) 4535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 454fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jmp L(mm_recalc_len) 4555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..16] and return. */ 4575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_backward): 4585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $24, %cl 4595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jnz L(mm_len_9_16_bytes_backward) 4605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $4, %cl 4615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,5 4625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jnz L(mm_len_5_8_bytes_backward) 4635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testl %ecx, %ecx 4645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,2 4655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return) 4665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $2, %cl 4675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,1 4685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_3_4_bytes_backward) 4695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl -1(%eax,%ecx), %ebx 4705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl (%eax), %eax 4715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %bl, -1(%edx,%ecx) 4725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %al, (%edx) 4735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 4745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_3_4_bytes_backward): 4765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl -2(%eax,%ecx), %ebx 4775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl (%eax), %eax 4785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %bx, -2(%edx,%ecx) 4795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %ax, (%edx) 4805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 4815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_backward): 4835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%esi) 4845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%eax,%ecx), %ebx 4855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -8(%eax,%ecx), %esi 4865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %ebx, -4(%edx,%ecx) 4875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %esi, -8(%edx,%ecx) 4885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik subl $8, %ecx 4895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 4905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_len_0_16_bytes_backward) 4915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_backward): 4935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl (%eax), %ebx 4945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%eax,%ecx), %eax 4955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %ebx, (%edx) 4965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %eax, -4(%edx,%ecx) 4975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return): 4995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %edx, %eax 5005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik RETURN 5015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return_pop_all): 503fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %edx, %eax 5045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%edi) 5055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 5065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik RETURN 5075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy forward part. */ 5095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 5115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_forward): 512fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax, %edi), %xmm0 513fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax, %edi), %xmm1 514fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax, %edi), %xmm2 515fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax, %edi), %xmm3 516fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm0, (%edi) 517fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm1, 16(%edi) 518fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm2, 32(%edi) 519fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm3, 48(%edi) 520fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal 64(%edi), %edi 521fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp %edi, %ebx 5225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_large_page_loop_forward) 5235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik sfence 524fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jmp L(mm_copy_remaining_forward) 5255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy backward part. */ 5275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 5285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_backward): 5295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%edi, %esi), %xmm0 5305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%edi, %esi), %xmm1 5315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%edi, %esi), %xmm2 5325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%edi, %esi), %xmm3 5335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm0, -64(%edi) 5345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm1, -48(%edi) 5355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm2, -32(%edi) 5365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm3, -16(%edi) 5375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal -64(%edi), %edi 5385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %edi, %ebx 5395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jb L(mm_large_page_loop_backward) 540fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik sfence 5415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%edi) 5425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 543fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jmp L(mm_recalc_len) 5445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikEND (MEMMOVE) 546