1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian/*
2ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian *
4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian *  Use of this source code is governed by a BSD-style license
5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian *  that can be found in the LICENSE file in the root of the source
6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian *  tree. An additional intellectual property rights grant can be found
7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian *  in the file PATENTS. All contributing project authors may
8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian *  be found in the AUTHORS file in the root of the source tree.
9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian */
10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
11ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "libyuv/basic_types.h"
127bc9febe8749e98a3812a0dc4380ceae75c29450Johann
137bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include "libyuv/compare_row.h"
14ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "libyuv/row.h"
15ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
16ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#ifdef __cplusplus
17ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramaniannamespace libyuv {
18ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianextern "C" {
19ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif
20ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
217bc9febe8749e98a3812a0dc4380ceae75c29450Johann// This module is for 32 bit Visual C x86 and clangcl
227bc9febe8749e98a3812a0dc4380ceae75c29450Johann#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
23ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked)
25ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
26ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  __asm {
27ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        eax, [esp + 4]    // src_a
28ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        edx, [esp + 8]    // src_b
29ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        ecx, [esp + 12]   // count
30ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pxor       xmm0, xmm0
31ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pxor       xmm5, xmm5
32ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
33ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  wloop:
34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    movdqu     xmm1, [eax]
35ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    lea        eax,  [eax + 16]
36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    movdqu     xmm2, [edx]
37ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    lea        edx,  [edx + 16]
38ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movdqa     xmm3, xmm1  // abs trick
39ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    psubusb    xmm1, xmm2
40ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    psubusb    xmm2, xmm3
41ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    por        xmm1, xmm2
42ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movdqa     xmm2, xmm1
43ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpcklbw  xmm1, xmm5
44ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpckhbw  xmm2, xmm5
45ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pmaddwd    xmm1, xmm1
46ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pmaddwd    xmm2, xmm2
47ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm0, xmm1
48ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm0, xmm2
49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    sub        ecx, 16
50ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    jg         wloop
51ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
52ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pshufd     xmm1, xmm0, 0xee
53ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm0, xmm1
54ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pshufd     xmm1, xmm0, 0x01
55ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm0, xmm1
56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movd       eax, xmm0
57ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    ret
58ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  }
59ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}
60ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
61ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Visual C 2012 required for AVX2.
62ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#if _MSC_VER >= 1700
63ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
64ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#pragma warning(disable: 4752)
65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked)
66ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
67ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  __asm {
68ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        eax, [esp + 4]    // src_a
69ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        edx, [esp + 8]    // src_b
70ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        ecx, [esp + 12]   // count
71ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpxor      ymm0, ymm0, ymm0  // sum
72ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
73ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    sub        edx, eax
74ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
75ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  wloop:
76ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vmovdqu    ymm1, [eax]
77ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vmovdqu    ymm2, [eax + edx]
78ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    lea        eax,  [eax + 32]
79ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
80ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpsubusb   ymm2, ymm2, ymm1
81ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpor       ymm1, ymm2, ymm3
82ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpunpcklbw ymm2, ymm1, ymm5  // u16.  mutates order.
83ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpunpckhbw ymm1, ymm1, ymm5
84ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpmaddwd   ymm2, ymm2, ymm2  // square + hadd to u32.
85ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpmaddwd   ymm1, ymm1, ymm1
86ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpaddd     ymm0, ymm0, ymm1
87ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpaddd     ymm0, ymm0, ymm2
88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    sub        ecx, 32
89ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    jg         wloop
90ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
91ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
92ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpaddd     ymm0, ymm0, ymm1
93ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpshufd    ymm1, ymm0, 0x01  // 1 + 0 both lanes.
94ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpaddd     ymm0, ymm0, ymm1
95ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpermq     ymm1, ymm0, 0x02  // high + low lane.
96ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vpaddd     ymm0, ymm0, ymm1
97ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vmovd      eax, xmm0
98ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    vzeroupper
99ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    ret
100ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  }
101ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}
102ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif  // _MSC_VER >= 1700
103ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
1047bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
1057bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul0 = {
106ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x0c3525e1,  // 33 ^ 15
107ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0xa3476dc1,  // 33 ^ 14
108ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x3b4039a1,  // 33 ^ 13
109ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x4f5f0981,  // 33 ^ 12
110ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian};
1117bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul1 = {
112ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x30f35d61,  // 33 ^ 11
113ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x855cb541,  // 33 ^ 10
114ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x040a9121,  // 33 ^ 9
115ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x747c7101,  // 33 ^ 8
116ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian};
1177bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul2 = {
118ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0xec41d4e1,  // 33 ^ 7
119ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x4cfa3cc1,  // 33 ^ 6
120ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x025528a1,  // 33 ^ 5
121ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x00121881,  // 33 ^ 4
122ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian};
1237bc9febe8749e98a3812a0dc4380ceae75c29450Johannuvec32 kHashMul3 = {
124ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x00008c61,  // 33 ^ 3
125ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x00000441,  // 33 ^ 2
126ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x00000021,  // 33 ^ 1
127ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  0x00000001,  // 33 ^ 0
128ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian};
129ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked)
131ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
132ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  __asm {
133ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        eax, [esp + 4]    // src
134ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        ecx, [esp + 8]    // count
135ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movd       xmm0, [esp + 12]  // seed
136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
137ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pxor       xmm7, xmm7        // constant 0 for unpck
1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    movdqa     xmm6, xmmword ptr kHash16x33
139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
140ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  wloop:
141ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movdqu     xmm1, [eax]       // src[0-15]
142ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    lea        eax, [eax + 16]
1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulld     xmm0, xmm6        // hash *= 33 ^ 16
1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann    movdqa     xmm5, xmmword ptr kHashMul0
145ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movdqa     xmm2, xmm1
146ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpcklbw  xmm2, xmm7        // src[0-7]
147ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movdqa     xmm3, xmm2
148ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpcklwd  xmm3, xmm7        // src[0-3]
1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulld     xmm3, xmm5
1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    movdqa     xmm5, xmmword ptr kHashMul1
151ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movdqa     xmm4, xmm2
152ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpckhwd  xmm4, xmm7        // src[4-7]
1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulld     xmm4, xmm5
1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann    movdqa     xmm5, xmmword ptr kHashMul2
155ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpckhbw  xmm1, xmm7        // src[8-15]
156ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movdqa     xmm2, xmm1
157ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpcklwd  xmm2, xmm7        // src[8-11]
1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulld     xmm2, xmm5
1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    movdqa     xmm5, xmmword ptr kHashMul3
160ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    punpckhwd  xmm1, xmm7        // src[12-15]
1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pmulld     xmm1, xmm5
162ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm3, xmm4        // add 16 results
163ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm1, xmm2
164ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm1, xmm3
165ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
166ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
167ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm1, xmm2
168ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    pshufd     xmm2, xmm1, 0x01
169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm1, xmm2
170ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    paddd      xmm0, xmm1
171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    sub        ecx, 16
172ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    jg         wloop
173ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
174ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    movd       eax, xmm0         // return hash
175ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    ret
176ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  }
177ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}
178ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
179ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Visual C 2012 required for AVX2.
180ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#if _MSC_VER >= 1700
181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian__declspec(naked)
182ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianuint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
183ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  __asm {
184ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        eax, [esp + 4]    // src
185ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    mov        ecx, [esp + 8]    // count
1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmovd      xmm0, [esp + 12]  // seed
187ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
188ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  wloop:
1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmovzxbd  xmm3, [eax]  // src[0-3]
1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmulld    xmm0, xmm0, xmmword ptr kHash16x33  // hash *= 33 ^ 16
1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmovzxbd  xmm4, [eax + 4]  // src[4-7]
1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmulld    xmm3, xmm3, xmmword ptr kHashMul0
1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmovzxbd  xmm2, [eax + 8]  // src[8-11]
1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmulld    xmm4, xmm4, xmmword ptr kHashMul1
1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmovzxbd  xmm1, [eax + 12]  // src[12-15]
1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmulld    xmm2, xmm2, xmmword ptr kHashMul2
197ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    lea        eax, [eax + 16]
1987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpmulld    xmm1, xmm1, xmmword ptr kHashMul3
1997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpaddd     xmm3, xmm3, xmm4        // add 16 results
2007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpaddd     xmm1, xmm1, xmm2
2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpaddd     xmm1, xmm1, xmm3
2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpshufd    xmm2, xmm1, 0x0e  // upper 2 dwords
2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpaddd     xmm1, xmm1,xmm2
2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpshufd    xmm2, xmm1, 0x01
2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpaddd     xmm1, xmm1, xmm2
2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpaddd     xmm0, xmm0, xmm1
207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    sub        ecx, 16
208ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    jg         wloop
209ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
2107bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmovd      eax, xmm0         // return hash
2117bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vzeroupper
212ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian    ret
213ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  }
214ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}
215ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif  // _MSC_VER >= 1700
2167bc9febe8749e98a3812a0dc4380ceae75c29450Johann
217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
218ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian
219ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#ifdef __cplusplus
220ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}  // extern "C"
221ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}  // namespace libyuv
222ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif
223