1/*
2 *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/basic_types.h"
12
13#include "libyuv/compare_row.h"
14#include "libyuv/row.h"
15
16#ifdef __cplusplus
17namespace libyuv {
18extern "C" {
19#endif
20
21// This module is for GCC x86 and x64.
22#if !defined(LIBYUV_DISABLE_X86) && \
23    (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
24
25uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
26  uint32 sse;
27  asm volatile (
28    "pxor      %%xmm0,%%xmm0                   \n"
29    "pxor      %%xmm5,%%xmm5                   \n"
30    LABELALIGN
31  "1:                                          \n"
32    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
33    "lea       " MEMLEA(0x10, 0) ",%0          \n"
34    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
35    "lea       " MEMLEA(0x10, 1) ",%1          \n"
36    "movdqa    %%xmm1,%%xmm3                   \n"
37    "psubusb   %%xmm2,%%xmm1                   \n"
38    "psubusb   %%xmm3,%%xmm2                   \n"
39    "por       %%xmm2,%%xmm1                   \n"
40    "movdqa    %%xmm1,%%xmm2                   \n"
41    "punpcklbw %%xmm5,%%xmm1                   \n"
42    "punpckhbw %%xmm5,%%xmm2                   \n"
43    "pmaddwd   %%xmm1,%%xmm1                   \n"
44    "pmaddwd   %%xmm2,%%xmm2                   \n"
45    "paddd     %%xmm1,%%xmm0                   \n"
46    "paddd     %%xmm2,%%xmm0                   \n"
47    "sub       $0x10,%2                        \n"
48    "jg        1b                              \n"
49
50    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
51    "paddd     %%xmm1,%%xmm0                   \n"
52    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
53    "paddd     %%xmm1,%%xmm0                   \n"
54    "movd      %%xmm0,%3                       \n"
55
56  : "+r"(src_a),      // %0
57    "+r"(src_b),      // %1
58    "+r"(count),      // %2
59    "=g"(sse)         // %3
60  :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
61  );
62  return sse;
63}
64
65static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
66static uvec32 kHashMul0 = {
67  0x0c3525e1,  // 33 ^ 15
68  0xa3476dc1,  // 33 ^ 14
69  0x3b4039a1,  // 33 ^ 13
70  0x4f5f0981,  // 33 ^ 12
71};
72static uvec32 kHashMul1 = {
73  0x30f35d61,  // 33 ^ 11
74  0x855cb541,  // 33 ^ 10
75  0x040a9121,  // 33 ^ 9
76  0x747c7101,  // 33 ^ 8
77};
78static uvec32 kHashMul2 = {
79  0xec41d4e1,  // 33 ^ 7
80  0x4cfa3cc1,  // 33 ^ 6
81  0x025528a1,  // 33 ^ 5
82  0x00121881,  // 33 ^ 4
83};
84static uvec32 kHashMul3 = {
85  0x00008c61,  // 33 ^ 3
86  0x00000441,  // 33 ^ 2
87  0x00000021,  // 33 ^ 1
88  0x00000001,  // 33 ^ 0
89};
90
91uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
92  uint32 hash;
93  asm volatile (
94    "movd      %2,%%xmm0                       \n"
95    "pxor      %%xmm7,%%xmm7                   \n"
96    "movdqa    %4,%%xmm6                       \n"
97    LABELALIGN
98  "1:                                          \n"
99    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
100    "lea       " MEMLEA(0x10, 0) ",%0          \n"
101    "pmulld    %%xmm6,%%xmm0                   \n"
102    "movdqa    %5,%%xmm5                       \n"
103    "movdqa    %%xmm1,%%xmm2                   \n"
104    "punpcklbw %%xmm7,%%xmm2                   \n"
105    "movdqa    %%xmm2,%%xmm3                   \n"
106    "punpcklwd %%xmm7,%%xmm3                   \n"
107    "pmulld    %%xmm5,%%xmm3                   \n"
108    "movdqa    %6,%%xmm5                       \n"
109    "movdqa    %%xmm2,%%xmm4                   \n"
110    "punpckhwd %%xmm7,%%xmm4                   \n"
111    "pmulld    %%xmm5,%%xmm4                   \n"
112    "movdqa    %7,%%xmm5                       \n"
113    "punpckhbw %%xmm7,%%xmm1                   \n"
114    "movdqa    %%xmm1,%%xmm2                   \n"
115    "punpcklwd %%xmm7,%%xmm2                   \n"
116    "pmulld    %%xmm5,%%xmm2                   \n"
117    "movdqa    %8,%%xmm5                       \n"
118    "punpckhwd %%xmm7,%%xmm1                   \n"
119    "pmulld    %%xmm5,%%xmm1                   \n"
120    "paddd     %%xmm4,%%xmm3                   \n"
121    "paddd     %%xmm2,%%xmm1                   \n"
122    "paddd     %%xmm3,%%xmm1                   \n"
123    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
124    "paddd     %%xmm2,%%xmm1                   \n"
125    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
126    "paddd     %%xmm2,%%xmm1                   \n"
127    "paddd     %%xmm1,%%xmm0                   \n"
128    "sub       $0x10,%1                        \n"
129    "jg        1b                              \n"
130    "movd      %%xmm0,%3                       \n"
131  : "+r"(src),        // %0
132    "+r"(count),      // %1
133    "+rm"(seed),      // %2
134    "=g"(hash)        // %3
135  : "m"(kHash16x33),  // %4
136    "m"(kHashMul0),   // %5
137    "m"(kHashMul1),   // %6
138    "m"(kHashMul2),   // %7
139    "m"(kHashMul3)    // %8
140  : "memory", "cc"
141    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
142  );
143  return hash;
144}
145#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
146
147#ifdef __cplusplus
148}  // extern "C"
149}  // namespace libyuv
150#endif
151
152