1/*
2 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vpx_config.h"
12#include "./vpx_dsp_rtcd.h"
13#include "vpx_dsp/x86/convolve.h"
14
15#if HAVE_SSE2
16filter8_1dfunction vpx_filter_block1d16_v8_sse2;
17filter8_1dfunction vpx_filter_block1d16_h8_sse2;
18filter8_1dfunction vpx_filter_block1d8_v8_sse2;
19filter8_1dfunction vpx_filter_block1d8_h8_sse2;
20filter8_1dfunction vpx_filter_block1d4_v8_sse2;
21filter8_1dfunction vpx_filter_block1d4_h8_sse2;
22filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2;
23filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2;
24filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2;
25filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2;
26filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2;
27filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2;
28
29filter8_1dfunction vpx_filter_block1d16_v2_sse2;
30filter8_1dfunction vpx_filter_block1d16_h2_sse2;
31filter8_1dfunction vpx_filter_block1d8_v2_sse2;
32filter8_1dfunction vpx_filter_block1d8_h2_sse2;
33filter8_1dfunction vpx_filter_block1d4_v2_sse2;
34filter8_1dfunction vpx_filter_block1d4_h2_sse2;
35filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2;
36filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2;
37filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2;
38filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2;
39filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2;
40filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2;
41
42// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
43//                               uint8_t *dst, ptrdiff_t dst_stride,
44//                               const int16_t *filter_x, int x_step_q4,
45//                               const int16_t *filter_y, int y_step_q4,
46//                               int w, int h);
47// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
48//                              uint8_t *dst, ptrdiff_t dst_stride,
49//                              const int16_t *filter_x, int x_step_q4,
50//                              const int16_t *filter_y, int y_step_q4,
51//                              int w, int h);
52// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
53//                                   uint8_t *dst, ptrdiff_t dst_stride,
54//                                   const int16_t *filter_x, int x_step_q4,
55//                                   const int16_t *filter_y, int y_step_q4,
56//                                   int w, int h);
57// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
58//                                  uint8_t *dst, ptrdiff_t dst_stride,
59//                                  const int16_t *filter_x, int x_step_q4,
60//                                  const int16_t *filter_y, int y_step_q4,
61//                                  int w, int h);
62FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
63FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
64FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
65FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
66
67// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
68//                         uint8_t *dst, ptrdiff_t dst_stride,
69//                         const int16_t *filter_x, int x_step_q4,
70//                         const int16_t *filter_y, int y_step_q4,
71//                         int w, int h);
72// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
73//                             uint8_t *dst, ptrdiff_t dst_stride,
74//                             const int16_t *filter_x, int x_step_q4,
75//                             const int16_t *filter_y, int y_step_q4,
76//                             int w, int h);
77FUN_CONV_2D(, sse2);
78FUN_CONV_2D(avg_, sse2);
79
80#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
81highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
82highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
83highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2;
84highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2;
85highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2;
86highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2;
87highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2;
88highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2;
89highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2;
90highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2;
91highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2;
92highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2;
93
94highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2;
95highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2;
96highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2;
97highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2;
98highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2;
99highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2;
100highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2;
101highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2;
102highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2;
103highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2;
104highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2;
105highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
106
107// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src,
108//                                      ptrdiff_t src_stride,
109//                                      uint8_t *dst,
110//                                      ptrdiff_t dst_stride,
111//                                      const int16_t *filter_x,
112//                                      int x_step_q4,
113//                                      const int16_t *filter_y,
114//                                      int y_step_q4,
115//                                      int w, int h, int bd);
116// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src,
117//                                     ptrdiff_t src_stride,
118//                                     uint8_t *dst,
119//                                     ptrdiff_t dst_stride,
120//                                     const int16_t *filter_x,
121//                                     int x_step_q4,
122//                                     const int16_t *filter_y,
123//                                     int y_step_q4,
124//                                     int w, int h, int bd);
125// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
126//                                          ptrdiff_t src_stride,
127//                                          uint8_t *dst,
128//                                          ptrdiff_t dst_stride,
129//                                          const int16_t *filter_x,
130//                                          int x_step_q4,
131//                                          const int16_t *filter_y,
132//                                          int y_step_q4,
133//                                          int w, int h, int bd);
134// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
135//                                         ptrdiff_t src_stride,
136//                                         uint8_t *dst,
137//                                         ptrdiff_t dst_stride,
138//                                         const int16_t *filter_x,
139//                                         int x_step_q4,
140//                                         const int16_t *filter_y,
141//                                         int y_step_q4,
142//                                         int w, int h, int bd);
143HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
144HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
145HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
146HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
147                 sse2);
148
149// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
150//                                uint8_t *dst, ptrdiff_t dst_stride,
151//                                const int16_t *filter_x, int x_step_q4,
152//                                const int16_t *filter_y, int y_step_q4,
153//                                int w, int h, int bd);
154// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
155//                                    uint8_t *dst, ptrdiff_t dst_stride,
156//                                    const int16_t *filter_x, int x_step_q4,
157//                                    const int16_t *filter_y, int y_step_q4,
158//                                    int w, int h, int bd);
159HIGH_FUN_CONV_2D(, sse2);
160HIGH_FUN_CONV_2D(avg_, sse2);
161#endif  // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
162#endif  // HAVE_SSE2
163