15a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* 25a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikCopyright (c) 2014, Intel Corporation 35a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikAll rights reserved. 45a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 55a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikRedistribution and use in source and binary forms, with or without 65a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikmodification, are permitted provided that the following conditions are met: 75a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 85a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * Redistributions of source code must retain the above copyright notice, 95a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * this list of conditions and the following disclaimer. 105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * Redistributions in binary form must reproduce the above copyright notice, 125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * this list of conditions and the following disclaimer in the documentation 135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * and/or other materials provided with the distribution. 145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * Neither the name of Intel Corporation nor the names of its contributors 165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * may be used to endorse or promote products derived from this software 175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik * without specific prior written permission. 185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik*/ 305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#include "cache.h" 325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef MEMMOVE 345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define MEMMOVE memmove 355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef L 385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define L(label) .L##label 395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_startproc 425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_startproc .cfi_startproc 435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_endproc 465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_endproc .cfi_endproc 475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_rel_offset 505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_restore 545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_restore(reg) .cfi_restore reg 555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef cfi_adjust_cfa_offset 585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef ENTRY 625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define ENTRY(name) \ 635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .type name, @function; \ 645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .globl name; \ 655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4; \ 665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchikname: \ 675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_startproc 685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#ifndef END 715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik# define END(name) \ 725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_endproc; \ 735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .size name, .-name 745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#endif 755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 76bed110af26f947057fd7940ba383b6f562d2df97Elliott Hughes#define DEST PARMS 77bed110af26f947057fd7940ba383b6f562d2df97Elliott Hughes#define SRC DEST+4 78bed110af26f947057fd7940ba383b6f562d2df97Elliott Hughes#define LEN SRC+4 795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_PUSH(REG) \ 815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_adjust_cfa_offset (4); \ 825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_rel_offset (REG, 0) 835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define CFI_POP(REG) \ 855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_adjust_cfa_offset (-4); \ 865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cfi_restore (REG) 875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PUSH(REG) pushl REG; CFI_PUSH (REG) 895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define POP(REG) popl REG; CFI_POP (REG) 905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define PARMS 8 /* Preserve EBX. */ 925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define ENTRANCE PUSH (%ebx); 935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN_END POP (%ebx); ret 945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik#define RETURN RETURN_END; CFI_PUSH (%ebx) 955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .section .text.sse2,"ax",@progbits 975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikENTRY (MEMMOVE) 985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ENTRANCE 995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl LEN(%esp), %ecx 1005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl SRC(%esp), %eax 1015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl DEST(%esp), %edx 1025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Check whether we should copy backward or forward. */ 1045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %eax, %edx 1055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return) 106fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jg L(mm_len_0_or_more_backward) 1075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128] 1095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik separately. */ 1105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $16, %ecx 1115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jbe L(mm_len_0_16_bytes_forward) 1125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 113fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $32, %ecx 114fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik ja L(mm_len_32_or_more_forward) 1155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return. */ 1175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 1185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm1 1195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 1205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, -16(%edx, %ecx) 1215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 1225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_forward): 124fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $64, %ecx 125fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik ja L(mm_len_64_or_more_forward) 1265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return. */ 1285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 1295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 1305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm2 1315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm3 1325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 1335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 1345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, -16(%edx, %ecx) 1355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, -32(%edx, %ecx) 1365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 1375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_forward): 139fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $128, %ecx 140fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik ja L(mm_len_128_or_more_forward) 1415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return. */ 1435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 1445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 1455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 32(%eax), %xmm2 1465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 48(%eax), %xmm3 1475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%eax, %ecx), %xmm4 1485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%eax, %ecx), %xmm5 1495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm6 1505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm7 1515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 1525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 1535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, 32(%edx) 1545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, 48(%edx) 1555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm4, -64(%edx, %ecx) 1565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm5, -48(%edx, %ecx) 1575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm6, -32(%edx, %ecx) 1585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm7, -16(%edx, %ecx) 1595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 1605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_forward): 1625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%esi) 1635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%edi) 1645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 1655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination. */ 166fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax), %xmm0 167fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax), %xmm1 168fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax), %xmm2 169fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax), %xmm3 1705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 171fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal 64(%edx), %edi 172fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik andl $-64, %edi 173fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik subl %edx, %eax 1745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 175fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax, %edi), %xmm4 176fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax, %edi), %xmm5 177fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax, %edi), %xmm6 178fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax, %edi), %xmm7 1795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 180fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm0, (%edx) 181fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm1, 16(%edx) 182fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm2, 32(%edx) 183fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm3, 48(%edx) 184fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqa %xmm4, (%edi) 185fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm5, 16(%edi) 186fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm6, 32(%edi) 187fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm7, 48(%edi) 188fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik addl $64, %edi 1895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 190fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal (%edx, %ecx), %ebx 1915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik andl $-64, %ebx 192fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp %edi, %ebx 1935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jbe L(mm_copy_remaining_forward) 1945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 195fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp $SHARED_CACHE_SIZE_HALF, %ecx 196fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jae L(mm_large_page_loop_forward) 197fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik 1985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 1995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_forward): 2005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 201fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik prefetcht0 128(%eax, %edi) 202fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik 203fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax, %edi), %xmm0 204fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax, %edi), %xmm1 205fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax, %edi), %xmm2 206fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax, %edi), %xmm3 207fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqa %xmm0, (%edi) 208fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm1, 16(%edi) 209fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm2, 32(%edi) 210fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movaps %xmm3, 48(%edi) 211fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal 64(%edi), %edi 212fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp %edi, %ebx 2135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_main_loop_forward) 2145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_copy_remaining_forward): 216fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik addl %edx, %ecx 217fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik subl %edi, %ecx 218fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* We copied all up till %edi position in the dst. 2195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik In %ecx now is how many bytes are left to copy. 2205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik Now we need to advance %esi. */ 221fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal (%edi, %eax), %esi 2225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_0_64_bytes_forward): 2245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $32, %ecx 2255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_33_64_bytes_forward) 2265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $16, %ecx 2275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_17_32_bytes_forward) 2285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testl %ecx, %ecx 2295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,2 2305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return_pop_all) 2315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmpb $8, %cl 2335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_9_16_bytes_forward) 2345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmpb $4, %cl 2355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,5 2365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_5_8_bytes_forward) 2375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmpb $2, %cl 2385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,1 2395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_remaining_3_4_bytes_forward) 2405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl -1(%esi,%ecx), %eax 2415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl (%esi), %ebx 242fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movb %al, -1(%edi,%ecx) 243fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movb %bl, (%edi) 2445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_33_64_bytes_forward): 2475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%esi), %xmm0 2485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%esi), %xmm1 2495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%esi, %ecx), %xmm2 2505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%esi, %ecx), %xmm3 251fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm0, (%edi) 252fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm1, 16(%edi) 253fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm2, -32(%edi, %ecx) 254fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm3, -16(%edi, %ecx) 2555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_17_32_bytes_forward): 2585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%esi), %xmm0 2595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%esi, %ecx), %xmm1 260fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm0, (%edi) 261fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu %xmm1, -16(%edi, %ecx) 2625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 264fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_9_16_bytes_forward): 265fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq (%esi), %xmm0 266fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq -8(%esi, %ecx), %xmm1 267fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq %xmm0, (%edi) 268fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movq %xmm1, -8(%edi, %ecx) 2695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_remaining_5_8_bytes_forward): 2725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl (%esi), %eax 2735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%esi,%ecx), %ebx 274fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %eax, (%edi) 275fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %ebx, -4(%edi,%ecx) 2765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 278fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_remaining_3_4_bytes_forward): 279fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movzwl -2(%esi,%ecx), %eax 280fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movzwl (%esi), %ebx 281fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movw %ax, -2(%edi,%ecx) 282fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movw %bx, (%edi) 2835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return_pop_all) 2845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 2855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_forward): 2865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $24, %cl 2875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_9_16_bytes_forward) 2885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $4, %cl 2895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,5 2905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_5_8_bytes_forward) 2915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testl %ecx, %ecx 2925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,2 2935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return) 2945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $2, %cl 2955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,1 2965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_2_4_bytes_forward) 2975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl -1(%eax,%ecx), %ebx 2985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl (%eax), %eax 2995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %bl, -1(%edx,%ecx) 3005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %al, (%edx) 3015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_2_4_bytes_forward): 3045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl -2(%eax,%ecx), %ebx 3055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl (%eax), %eax 3065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %bx, -2(%edx,%ecx) 3075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %ax, (%edx) 3085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_forward): 3115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl (%eax), %ebx 3125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%eax,%ecx), %eax 3135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %ebx, (%edx) 3145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %eax, -4(%edx,%ecx) 3155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_forward): 3185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq (%eax), %xmm0 3195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq -8(%eax, %ecx), %xmm1 3205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq %xmm0, (%edx) 3215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movq %xmm1, -8(%edx, %ecx) 3225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 32497b6e131fd85bb0ae321d9e5a8a613f5ee373cf4Christopher Ferris CFI_POP (%edi) 32597b6e131fd85bb0ae321d9e5a8a613f5ee373cf4Christopher Ferris CFI_POP (%esi) 32697b6e131fd85bb0ae321d9e5a8a613f5ee373cf4Christopher Ferris 327fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_recalc_len): 328fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Compute in %ecx how many bytes are left to copy after 329fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik the main loop stops. */ 330fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %ebx, %ecx 331fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik subl %edx, %ecx 3325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* The code for copying backwards. */ 3335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_or_more_backward): 3345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 335fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128] 3365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik separately. */ 3375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp $16, %ecx 3385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jbe L(mm_len_0_16_bytes_backward) 3395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 340fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $32, %ecx 3415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jg L(mm_len_32_or_more_backward) 3425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..32] and return. */ 3445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 3455a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm1 3465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 3475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, -16(%edx, %ecx) 3485a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_32_or_more_backward): 351fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $64, %ecx 3525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jg L(mm_len_64_or_more_backward) 3535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..64] and return. */ 3555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 3565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 3575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm2 3585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm3 3595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 3605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 3615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, -16(%edx, %ecx) 3625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, -32(%edx, %ecx) 3635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_64_or_more_backward): 366fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmpl $128, %ecx 3675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jg L(mm_len_128_or_more_backward) 3685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..128] and return. */ 3705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu (%eax), %xmm0 3715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 16(%eax), %xmm1 3725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 32(%eax), %xmm2 3735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu 48(%eax), %xmm3 3745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%eax, %ecx), %xmm4 3755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%eax, %ecx), %xmm5 3765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm6 3775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm7 3785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, (%edx) 3795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, 16(%edx) 3805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, 32(%edx) 3815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, 48(%edx) 3825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm4, -64(%edx, %ecx) 3835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm5, -48(%edx, %ecx) 3845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm6, -32(%edx, %ecx) 3855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm7, -16(%edx, %ecx) 3865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 3875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_128_or_more_backward): 3895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%esi) 3905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%edi) 3915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Aligning the address of destination. We need to save 3935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 16 bits from the source in order not to overwrite them. */ 3945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%eax, %ecx), %xmm0 3955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%eax, %ecx), %xmm1 3965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%eax, %ecx), %xmm2 3975a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%eax, %ecx), %xmm3 3985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 3995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal (%edx, %ecx), %edi 4005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik andl $-64, %edi 4015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %eax, %esi 4035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik subl %edx, %esi 4045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%edi, %esi), %xmm4 4065a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%edi, %esi), %xmm5 4075a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%edi, %esi), %xmm6 4085a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%edi, %esi), %xmm7 4095a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4105a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm0, -16(%edx, %ecx) 4115a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm1, -32(%edx, %ecx) 4125a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm2, -48(%edx, %ecx) 4135a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu %xmm3, -64(%edx, %ecx) 4145a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm4, -16(%edi) 4155a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm5, -32(%edi) 4165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm6, -48(%edi) 4175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm7, -64(%edi) 4185a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal -64(%edi), %edi 4195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal 64(%edx), %ebx 4215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik andl $-64, %ebx 4225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %edi, %ebx 424fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jae L(mm_main_loop_backward_end) 4255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 426fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp $SHARED_CACHE_SIZE_HALF, %ecx 427fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jae L(mm_large_page_loop_backward) 4285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 4305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_main_loop_backward): 4315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik prefetcht0 -128(%edi, %esi) 4335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4345a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%edi, %esi), %xmm0 4355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%edi, %esi), %xmm1 4365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%edi, %esi), %xmm2 4375a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%edi, %esi), %xmm3 4385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm0, -64(%edi) 4395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm1, -48(%edi) 4405a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm2, -32(%edi) 4415a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqa %xmm3, -16(%edi) 4425a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal -64(%edi), %edi 4435a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %edi, %ebx 4445a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jb L(mm_main_loop_backward) 445fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara RainchikL(mm_main_loop_backward_end): 4465a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%edi) 4475a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 448fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jmp L(mm_recalc_len) 4495a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4505a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Copy [0..16] and return. */ 4515a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_0_16_bytes_backward): 4525a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $24, %cl 4535a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jnz L(mm_len_9_16_bytes_backward) 4545a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $4, %cl 4555a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,5 4565a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jnz L(mm_len_5_8_bytes_backward) 4575a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testl %ecx, %ecx 4585a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,2 4595a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik je L(mm_return) 4605a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik testb $2, %cl 4615a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4,,1 4625a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jne L(mm_len_3_4_bytes_backward) 4635a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl -1(%eax,%ecx), %ebx 4645a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzbl (%eax), %eax 4655a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %bl, -1(%edx,%ecx) 4665a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movb %al, (%edx) 4675a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 4685a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4695a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_3_4_bytes_backward): 4705a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl -2(%eax,%ecx), %ebx 4715a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movzwl (%eax), %eax 4725a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %bx, -2(%edx,%ecx) 4735a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movw %ax, (%edx) 4745a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_return) 4755a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4765a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_9_16_bytes_backward): 4775a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik PUSH (%esi) 4785a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%eax,%ecx), %ebx 4795a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -8(%eax,%ecx), %esi 4805a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %ebx, -4(%edx,%ecx) 4815a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %esi, -8(%edx,%ecx) 4825a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik subl $8, %ecx 4835a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 4845a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jmp L(mm_len_0_16_bytes_backward) 4855a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4865a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_len_5_8_bytes_backward): 4875a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl (%eax), %ebx 4885a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl -4(%eax,%ecx), %eax 4895a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %ebx, (%edx) 4905a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %eax, -4(%edx,%ecx) 4915a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4925a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return): 4935a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movl %edx, %eax 4945a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik RETURN 4955a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 4965a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_return_pop_all): 497fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movl %edx, %eax 4985a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%edi) 4995a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 5005a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik RETURN 5015a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5025a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy forward part. */ 5035a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5045a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 5055a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_forward): 506fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu (%eax, %edi), %xmm0 507fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 16(%eax, %edi), %xmm1 508fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 32(%eax, %edi), %xmm2 509fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movdqu 48(%eax, %edi), %xmm3 510fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm0, (%edi) 511fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm1, 16(%edi) 512fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm2, 32(%edi) 513fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik movntdq %xmm3, 48(%edi) 514fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik leal 64(%edi), %edi 515fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik cmp %edi, %ebx 5165a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik ja L(mm_large_page_loop_forward) 5175a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik sfence 518fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jmp L(mm_copy_remaining_forward) 5195a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5205a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik/* Big length copy backward part. */ 5215a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik .p2align 4 5225a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikL(mm_large_page_loop_backward): 5235a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -64(%edi, %esi), %xmm0 5245a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -48(%edi, %esi), %xmm1 5255a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -32(%edi, %esi), %xmm2 5265a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movdqu -16(%edi, %esi), %xmm3 5275a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm0, -64(%edi) 5285a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm1, -48(%edi) 5295a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm2, -32(%edi) 5305a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik movntdq %xmm3, -16(%edi) 5315a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik leal -64(%edi), %edi 5325a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik cmp %edi, %ebx 5335a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik jb L(mm_large_page_loop_backward) 534fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik sfence 5355a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%edi) 5365a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik POP (%esi) 537fce861498c8c4720c6ad2475a73bb4c3e55d6948Varvara Rainchik jmp L(mm_recalc_len) 5385a92284167ffba6d45210ef6889fa7d255c15d4fVarvara Rainchik 5395a92284167ffba6d45210ef6889fa7d255c15d4fVarvara RainchikEND (MEMMOVE) 540