1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian/* 2ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Copyright 2012 The LibYuv Project Authors. All rights reserved. 3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * 4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian */ 10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 11ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "libyuv/basic_types.h" 127bc9febe8749e98a3812a0dc4380ceae75c29450Johann 137bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include "libyuv/compare_row.h" 14ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "libyuv/row.h" 15ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 16ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#ifdef __cplusplus 17ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramaniannamespace libyuv { 18ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianextern "C" { 19ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif 20ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 217bc9febe8749e98a3812a0dc4380ceae75c29450Johann// This module is for 32 bit Visual C x86 and clangcl 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) 23ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked) 25ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { 26ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian __asm { 27ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov eax, [esp + 4] // src_a 28ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov edx, [esp + 8] // src_b 29ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov ecx, [esp + 12] // count 30ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pxor xmm0, xmm0 31ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pxor xmm5, xmm5 32ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 33ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian wloop: 34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian movdqu xmm1, [eax] 35ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian lea eax, [eax + 16] 36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian movdqu xmm2, [edx] 37ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian lea edx, [edx + 16] 38ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movdqa xmm3, xmm1 // abs trick 39ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psubusb xmm1, xmm2 40ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian psubusb xmm2, xmm3 41ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian por xmm1, xmm2 42ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movdqa xmm2, xmm1 43ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpcklbw xmm1, xmm5 44ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpckhbw xmm2, xmm5 45ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaddwd xmm1, xmm1 46ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pmaddwd xmm2, xmm2 47ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm0, xmm1 48ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm0, xmm2 49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian sub ecx, 16 50ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jg wloop 51ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 52ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pshufd xmm1, xmm0, 0xee 53ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm0, xmm1 54ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pshufd xmm1, xmm0, 0x01 55ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm0, xmm1 56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movd eax, xmm0 57ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ret 58ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 59ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 60ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 61ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Visual C 2012 required for AVX2. 62ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#if _MSC_VER >= 1700 63ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. 64ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#pragma warning(disable: 4752) 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked) 66ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { 67ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian __asm { 68ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov eax, [esp + 4] // src_a 69ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov edx, [esp + 8] // src_b 70ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov ecx, [esp + 12] // count 71ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpxor ymm0, ymm0, ymm0 // sum 72ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpxor ymm5, ymm5, ymm5 // constant 0 for unpck 73ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian sub edx, eax 74ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 75ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian wloop: 76ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vmovdqu ymm1, [eax] 77ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vmovdqu ymm2, [eax + edx] 78ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian lea eax, [eax + 32] 79ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpsubusb ymm3, ymm1, ymm2 // abs difference trick 80ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpsubusb ymm2, ymm2, ymm1 81ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpor ymm1, ymm2, ymm3 82ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order. 83ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpunpckhbw ymm1, ymm1, ymm5 84ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32. 85ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpmaddwd ymm1, ymm1, ymm1 86ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpaddd ymm0, ymm0, ymm1 87ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpaddd ymm0, ymm0, ymm2 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian sub ecx, 32 89ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jg wloop 90ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 91ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes. 92ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpaddd ymm0, ymm0, ymm1 93ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes. 94ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpaddd ymm0, ymm0, ymm1 95ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpermq ymm1, ymm0, 0x02 // high + low lane. 96ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpaddd ymm0, ymm0, ymm1 97ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vmovd eax, xmm0 98ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vzeroupper 99ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ret 100ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 101ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 102ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif // _MSC_VER >= 1700 103ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1047bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 1057bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul0 = { 106ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x0c3525e1, // 33 ^ 15 107ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0xa3476dc1, // 33 ^ 14 108ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x3b4039a1, // 33 ^ 13 109ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x4f5f0981, // 33 ^ 12 110ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}; 1117bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul1 = { 112ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x30f35d61, // 33 ^ 11 113ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x855cb541, // 33 ^ 10 114ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x040a9121, // 33 ^ 9 115ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x747c7101, // 33 ^ 8 116ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}; 1177bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul2 = { 118ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0xec41d4e1, // 33 ^ 7 119ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x4cfa3cc1, // 33 ^ 6 120ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x025528a1, // 33 ^ 5 121ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x00121881, // 33 ^ 4 122ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}; 1237bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul3 = { 124ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x00008c61, // 33 ^ 3 125ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x00000441, // 33 ^ 2 126ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x00000021, // 33 ^ 1 127ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 0x00000001, // 33 ^ 0 128ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}; 129ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked) 131ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { 132ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian __asm { 133ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov eax, [esp + 4] // src 134ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov ecx, [esp + 8] // count 135ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movd xmm0, [esp + 12] // seed 136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 137ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pxor xmm7, xmm7 // constant 0 for unpck 1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann movdqa xmm6, xmmword ptr kHash16x33 139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 140ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian wloop: 141ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movdqu xmm1, [eax] // src[0-15] 142ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian lea eax, [eax + 16] 1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann pmulld xmm0, xmm6 // hash *= 33 ^ 16 1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann movdqa xmm5, xmmword ptr kHashMul0 145ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movdqa xmm2, xmm1 146ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpcklbw xmm2, xmm7 // src[0-7] 147ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movdqa xmm3, xmm2 148ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpcklwd xmm3, xmm7 // src[0-3] 1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann pmulld xmm3, xmm5 1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann movdqa xmm5, xmmword ptr kHashMul1 151ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movdqa xmm4, xmm2 152ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpckhwd xmm4, xmm7 // src[4-7] 1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann pmulld xmm4, xmm5 1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann movdqa xmm5, xmmword ptr kHashMul2 155ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpckhbw xmm1, xmm7 // src[8-15] 156ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movdqa xmm2, xmm1 157ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpcklwd xmm2, xmm7 // src[8-11] 1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann pmulld xmm2, xmm5 1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann movdqa xmm5, xmmword ptr kHashMul3 160ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian punpckhwd xmm1, xmm7 // src[12-15] 1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann pmulld xmm1, xmm5 162ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm3, xmm4 // add 16 results 163ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm1, xmm2 164ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm1, xmm3 165ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 166ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pshufd xmm2, xmm1, 0x0e // upper 2 dwords 167ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm1, xmm2 168ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pshufd xmm2, xmm1, 0x01 169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm1, xmm2 170ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian paddd xmm0, xmm1 171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian sub ecx, 16 172ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jg wloop 173ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 174ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian movd eax, xmm0 // return hash 175ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ret 176ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 177ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 178ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 179ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Visual C 2012 required for AVX2. 180ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#if _MSC_VER >= 1700 181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked) 182ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { 183ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian __asm { 184ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov eax, [esp + 4] // src 185ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mov ecx, [esp + 8] // count 1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmovd xmm0, [esp + 12] // seed 187ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 188ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian wloop: 1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmovzxbd xmm3, [eax] // src[0-3] 1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16 1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmovzxbd xmm4, [eax + 4] // src[4-7] 1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmulld xmm3, xmm3, xmmword ptr kHashMul0 1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmovzxbd xmm2, [eax + 8] // src[8-11] 1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmulld xmm4, xmm4, xmmword ptr kHashMul1 1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmovzxbd xmm1, [eax + 12] // src[12-15] 1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmulld xmm2, xmm2, xmmword ptr kHashMul2 197ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian lea eax, [eax + 16] 1987bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpmulld xmm1, xmm1, xmmword ptr kHashMul3 1997bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpaddd xmm3, xmm3, xmm4 // add 16 results 2007bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpaddd xmm1, xmm1, xmm2 2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpaddd xmm1, xmm1, xmm3 2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpshufd xmm2, xmm1, 0x0e // upper 2 dwords 2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpaddd xmm1, xmm1,xmm2 2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpshufd xmm2, xmm1, 0x01 2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpaddd xmm1, xmm1, xmm2 2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpaddd xmm0, xmm0, xmm1 207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian sub ecx, 16 208ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian jg wloop 209ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2107bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmovd eax, xmm0 // return hash 2117bc9febe8749e98a3812a0dc4380ceae75c29450Johann vzeroupper 212ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ret 213ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 214ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 215ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif // _MSC_VER >= 1700 2167bc9febe8749e98a3812a0dc4380ceae75c29450Johann 217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) 218ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 219ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#ifdef __cplusplus 220ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} // extern "C" 221ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} // namespace libyuv 222ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif 223