1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include <assert.h>
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vpx_config.h"
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h"
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h"
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan
17233d2500723e5594f3e7c70896ffeeef32b9c950ywantypedef void filter8_1dfunction (
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const unsigned char *src_ptr,
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const ptrdiff_t src_pitch,
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned char *output_ptr,
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan  ptrdiff_t out_pitch,
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan  unsigned int output_height,
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan  const short *filter
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan);
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan  void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                   uint8_t *dst, ptrdiff_t dst_stride, \
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                   const int16_t *filter_x, int x_step_q4, \
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                   const int16_t *filter_y, int y_step_q4, \
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                   int w, int h) { \
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan  if (step_q4 == 16 && filter[3] != 128) { \
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan    if (filter[0] || filter[1] || filter[2]) { \
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan      while (w >= 16) { \
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan        vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 src_stride, \
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 dst, \
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 dst_stride, \
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 h, \
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 filter); \
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += 16; \
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += 16; \
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan        w -= 16; \
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } \
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan      while (w >= 8) { \
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan        vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                src_stride, \
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst, \
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst_stride, \
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                h, \
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                filter); \
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += 8; \
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += 8; \
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan        w -= 8; \
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } \
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan      while (w >= 4) { \
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan        vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                src_stride, \
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst, \
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst_stride, \
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                h, \
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                filter); \
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += 4; \
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += 4; \
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan        w -= 4; \
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } \
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } else { \
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan      while (w >= 16) { \
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan        vp9_filter_block1d16_##dir##2_##avg##opt(src, \
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 src_stride, \
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 dst, \
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 dst_stride, \
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 h, \
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                 filter); \
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += 16; \
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += 16; \
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan        w -= 16; \
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } \
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan      while (w >= 8) { \
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan        vp9_filter_block1d8_##dir##2_##avg##opt(src, \
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                src_stride, \
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst, \
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst_stride, \
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                h, \
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                filter); \
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += 8; \
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += 8; \
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan        w -= 8; \
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } \
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan      while (w >= 4) { \
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan        vp9_filter_block1d4_##dir##2_##avg##opt(src, \
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                src_stride, \
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst, \
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                dst_stride, \
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                h, \
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                filter); \
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += 4; \
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += 4; \
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan        w -= 4; \
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } \
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } \
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan  } \
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan  if (w) { \
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan                             filter_x, x_step_q4, filter_y, y_step_q4, \
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan                             w, h); \
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan  } \
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FUN_CONV_2D(avg, opt) \
111233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              uint8_t *dst, ptrdiff_t dst_stride, \
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              const int16_t *filter_x, int x_step_q4, \
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              const int16_t *filter_y, int y_step_q4, \
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              int w, int h) { \
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan  assert(w <= 64); \
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan  assert(h <= 64); \
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan  if (x_step_q4 == 16 && y_step_q4 == 16) { \
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan    if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan        filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan      DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                filter_x, x_step_q4, filter_y, y_step_q4, \
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                w, h + 7); \
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      filter_x, x_step_q4, filter_y, \
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      y_step_q4, w, h); \
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } else { \
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan      DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                filter_x, x_step_q4, filter_y, y_step_q4, \
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                w, h + 1); \
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
134233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      filter_x, x_step_q4, filter_y, \
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                      y_step_q4, w, h); \
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } \
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan  } else { \
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan                           filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan  } \
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_AVX2
143233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_avx2;
144233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_avx2;
145233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_ssse3;
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if (ARCH_X86_64)
147233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
148233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
149233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan#else
154233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_ssse3;
155233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_ssse3;
156233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_ssse3;
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
161233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_ssse3;
162233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_ssse3;
163233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_ssse3;
164233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_ssse3;
165233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_ssse3;
166233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_ssse3;
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                uint8_t *dst, ptrdiff_t dst_stride,
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                const int16_t *filter_x, int x_step_q4,
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                const int16_t *filter_y, int y_step_q4,
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                int w, int h);
179233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               uint8_t *dst, ptrdiff_t dst_stride,
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               const int16_t *filter_x, int x_step_q4,
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               const int16_t *filter_y, int y_step_q4,
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               int w, int h);
184233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
185233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          uint8_t *dst, ptrdiff_t dst_stride,
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          const int16_t *filter_x, int x_step_q4,
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          const int16_t *filter_y, int y_step_q4,
191233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          int w, int h);
192233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(, avx2);
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
194233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_SSSE3
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if (ARCH_X86_64)
196233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
197233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
198233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
199233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
200233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_ssse3;
201233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
205233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
206233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
207233d2500723e5594f3e7c70896ffeeef32b9c950ywan#else
208233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_ssse3;
209233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_ssse3;
210233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_ssse3;
211233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_ssse3;
212233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_ssse3;
213233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_ssse3;
214233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
215233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
216233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
217233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
218233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
219233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
220233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
221233d2500723e5594f3e7c70896ffeeef32b9c950ywan
222233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_ssse3;
223233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_ssse3;
224233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_ssse3;
225233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_ssse3;
226233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_ssse3;
227233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_ssse3;
228233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
229233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
230233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
231233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
232233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
233233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
234233d2500723e5594f3e7c70896ffeeef32b9c950ywan
235233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
236233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                uint8_t *dst, ptrdiff_t dst_stride,
237233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                const int16_t *filter_x, int x_step_q4,
238233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                const int16_t *filter_y, int y_step_q4,
239233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                int w, int h);
240233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
241233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               uint8_t *dst, ptrdiff_t dst_stride,
242233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               const int16_t *filter_x, int x_step_q4,
243233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               const int16_t *filter_y, int y_step_q4,
244233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               int w, int h);
245233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
246233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                    uint8_t *dst, ptrdiff_t dst_stride,
247233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                    const int16_t *filter_x, int x_step_q4,
248233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                    const int16_t *filter_y, int y_step_q4,
249233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                    int w, int h);
250233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
251233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   uint8_t *dst, ptrdiff_t dst_stride,
252233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   const int16_t *filter_x, int x_step_q4,
253233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   const int16_t *filter_y, int y_step_q4,
254233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   int w, int h);
255233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
256233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
257233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
258233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
259233d2500723e5594f3e7c70896ffeeef32b9c950ywan            ssse3);
260233d2500723e5594f3e7c70896ffeeef32b9c950ywan
261233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
262233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          uint8_t *dst, ptrdiff_t dst_stride,
263233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          const int16_t *filter_x, int x_step_q4,
264233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          const int16_t *filter_y, int y_step_q4,
265233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                          int w, int h);
266233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
267233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              uint8_t *dst, ptrdiff_t dst_stride,
268233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              const int16_t *filter_x, int x_step_q4,
269233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              const int16_t *filter_y, int y_step_q4,
270233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              int w, int h);
271233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(, ssse3);
272233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(avg_ , ssse3);
273233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
274233d2500723e5594f3e7c70896ffeeef32b9c950ywan
275233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_SSE2
276233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_sse2;
277233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_sse2;
278233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_sse2;
279233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_sse2;
280233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_sse2;
281233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_sse2;
282233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_avg_sse2;
283233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_avg_sse2;
284233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_avg_sse2;
285233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
286233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
287233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
288233d2500723e5594f3e7c70896ffeeef32b9c950ywan
289233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_sse2;
290233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_sse2;
291233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_sse2;
292233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_sse2;
293233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_sse2;
294233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_sse2;
295233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
296233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
297233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
298233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
299233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
300233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
301233d2500723e5594f3e7c70896ffeeef32b9c950ywan
302233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
303233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               uint8_t *dst, ptrdiff_t dst_stride,
304233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               const int16_t *filter_x, int x_step_q4,
305233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               const int16_t *filter_y, int y_step_q4,
306233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                               int w, int h);
307233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
308233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              uint8_t *dst, ptrdiff_t dst_stride,
309233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              const int16_t *filter_x, int x_step_q4,
310233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              const int16_t *filter_y, int y_step_q4,
311233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                              int w, int h);
312233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
313233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   uint8_t *dst, ptrdiff_t dst_stride,
314233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   const int16_t *filter_x, int x_step_q4,
315233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   const int16_t *filter_y, int y_step_q4,
316233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                   int w, int h);
317233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
318233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                  uint8_t *dst, ptrdiff_t dst_stride,
319233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                  const int16_t *filter_x, int x_step_q4,
320233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                  const int16_t *filter_y, int y_step_q4,
321233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                                  int w, int h);
322233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
323233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
324233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
325233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
326233d2500723e5594f3e7c70896ffeeef32b9c950ywan
327233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
328233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                         uint8_t *dst, ptrdiff_t dst_stride,
329233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                         const int16_t *filter_x, int x_step_q4,
330233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                         const int16_t *filter_y, int y_step_q4,
331233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                         int w, int h);
332233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
333233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                             uint8_t *dst, ptrdiff_t dst_stride,
334233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                             const int16_t *filter_x, int x_step_q4,
335233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                             const int16_t *filter_y, int y_step_q4,
336233d2500723e5594f3e7c70896ffeeef32b9c950ywan//                             int w, int h);
337233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(, sse2);
338233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(avg_ , sse2);
339233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
340