1/* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <assert.h> 12 13#include "./vpx_config.h" 14#include "./vp9_rtcd.h" 15#include "vpx_ports/mem.h" 16 17typedef void filter8_1dfunction ( 18 const unsigned char *src_ptr, 19 const ptrdiff_t src_pitch, 20 unsigned char *output_ptr, 21 ptrdiff_t out_pitch, 22 unsigned int output_height, 23 const short *filter 24); 25 26#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 27 void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ 28 uint8_t *dst, ptrdiff_t dst_stride, \ 29 const int16_t *filter_x, int x_step_q4, \ 30 const int16_t *filter_y, int y_step_q4, \ 31 int w, int h) { \ 32 if (step_q4 == 16 && filter[3] != 128) { \ 33 if (filter[0] || filter[1] || filter[2]) { \ 34 while (w >= 16) { \ 35 vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ 36 src_stride, \ 37 dst, \ 38 dst_stride, \ 39 h, \ 40 filter); \ 41 src += 16; \ 42 dst += 16; \ 43 w -= 16; \ 44 } \ 45 while (w >= 8) { \ 46 vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ 47 src_stride, \ 48 dst, \ 49 dst_stride, \ 50 h, \ 51 filter); \ 52 src += 8; \ 53 dst += 8; \ 54 w -= 8; \ 55 } \ 56 while (w >= 4) { \ 57 vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ 58 src_stride, \ 59 dst, \ 60 dst_stride, \ 61 h, \ 62 filter); \ 63 src += 4; \ 64 dst += 4; \ 65 w -= 4; \ 66 } \ 67 } else { \ 68 while (w >= 16) { \ 69 vp9_filter_block1d16_##dir##2_##avg##opt(src, \ 70 src_stride, \ 71 dst, \ 72 dst_stride, \ 73 h, \ 74 filter); \ 75 src += 16; \ 76 dst += 16; \ 77 w -= 16; \ 78 } \ 79 while (w >= 8) { \ 80 vp9_filter_block1d8_##dir##2_##avg##opt(src, \ 81 src_stride, \ 82 dst, \ 83 dst_stride, \ 84 h, \ 85 filter); \ 86 src += 8; \ 87 dst += 8; \ 88 w -= 8; \ 89 } \ 90 while (w >= 4) { \ 91 vp9_filter_block1d4_##dir##2_##avg##opt(src, \ 92 src_stride, \ 93 dst, \ 94 dst_stride, \ 95 h, \ 96 filter); \ 97 src += 4; \ 98 dst += 4; \ 99 w -= 4; \ 100 } \ 101 } \ 102 } \ 103 if (w) { \ 104 vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ 105 filter_x, x_step_q4, filter_y, y_step_q4, \ 106 w, h); \ 107 } \ 108} 109 110#define FUN_CONV_2D(avg, opt) \ 111void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ 112 uint8_t *dst, ptrdiff_t dst_stride, \ 113 const int16_t *filter_x, int x_step_q4, \ 114 const int16_t *filter_y, int y_step_q4, \ 115 int w, int h) { \ 116 assert(w <= 64); \ 117 assert(h <= 64); \ 118 if (x_step_q4 == 16 && y_step_q4 == 16) { \ 119 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ 120 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ 121 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \ 122 vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ 123 filter_x, x_step_q4, filter_y, y_step_q4, \ 124 w, h + 7); \ 125 vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ 126 filter_x, x_step_q4, filter_y, \ 127 y_step_q4, w, h); \ 128 } else { \ 129 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \ 130 vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ 131 filter_x, x_step_q4, filter_y, y_step_q4, \ 132 w, h + 1); \ 133 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ 134 filter_x, x_step_q4, filter_y, \ 135 y_step_q4, w, h); \ 136 } \ 137 } else { \ 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ 140 } \ 141} 142 143#if CONFIG_VP9_HIGHBITDEPTH 144 145typedef void high_filter8_1dfunction ( 146 const uint16_t *src_ptr, 147 const ptrdiff_t src_pitch, 148 uint16_t *output_ptr, 149 ptrdiff_t out_pitch, 150 unsigned int output_height, 151 const int16_t *filter, 152 int bd 153); 154 155#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 156 void vp9_high_convolve8_##name##_##opt(const uint8_t *src8, \ 157 ptrdiff_t src_stride, \ 158 uint8_t *dst8, ptrdiff_t dst_stride, \ 159 const int16_t *filter_x, \ 160 int x_step_q4, \ 161 const int16_t *filter_y, \ 162 int y_step_q4, \ 163 int w, int h, int bd) { \ 164 if (step_q4 == 16 && filter[3] != 128) { \ 165 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ 166 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ 167 if (filter[0] || filter[1] || filter[2]) { \ 168 while (w >= 16) { \ 169 vp9_high_filter_block1d16_##dir##8_##avg##opt(src_start, \ 170 src_stride, \ 171 dst, \ 172 dst_stride, \ 173 h, \ 174 filter, \ 175 bd); \ 176 src += 16; \ 177 dst += 16; \ 178 w -= 16; \ 179 } \ 180 while (w >= 8) { \ 181 vp9_high_filter_block1d8_##dir##8_##avg##opt(src_start, \ 182 src_stride, \ 183 dst, \ 184 dst_stride, \ 185 h, \ 186 filter, \ 187 bd); \ 188 src += 8; \ 189 dst += 8; \ 190 w -= 8; \ 191 } \ 192 while (w >= 4) { \ 193 vp9_high_filter_block1d4_##dir##8_##avg##opt(src_start, \ 194 src_stride, \ 195 dst, \ 196 dst_stride, \ 197 h, \ 198 filter, \ 199 bd); \ 200 src += 4; \ 201 dst += 4; \ 202 w -= 4; \ 203 } \ 204 } else { \ 205 while (w >= 16) { \ 206 vp9_high_filter_block1d16_##dir##2_##avg##opt(src, \ 207 src_stride, \ 208 dst, \ 209 dst_stride, \ 210 h, \ 211 filter, \ 212 bd); \ 213 src += 16; \ 214 dst += 16; \ 215 w -= 16; \ 216 } \ 217 while (w >= 8) { \ 218 vp9_high_filter_block1d8_##dir##2_##avg##opt(src, \ 219 src_stride, \ 220 dst, \ 221 dst_stride, \ 222 h, \ 223 filter, \ 224 bd); \ 225 src += 8; \ 226 dst += 8; \ 227 w -= 8; \ 228 } \ 229 while (w >= 4) { \ 230 vp9_high_filter_block1d4_##dir##2_##avg##opt(src, \ 231 src_stride, \ 232 dst, \ 233 dst_stride, \ 234 h, \ 235 filter, \ 236 bd); \ 237 src += 4; \ 238 dst += 4; \ 239 w -= 4; \ 240 } \ 241 } \ 242 } \ 243 if (w) { \ 244 vp9_high_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ 245 filter_x, x_step_q4, filter_y, y_step_q4, \ 246 w, h, bd); \ 247 } \ 248} 249 250#define HIGH_FUN_CONV_2D(avg, opt) \ 251void vp9_high_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ 252 uint8_t *dst, ptrdiff_t dst_stride, \ 253 const int16_t *filter_x, int x_step_q4, \ 254 const int16_t *filter_y, int y_step_q4, \ 255 int w, int h, int bd) { \ 256 assert(w <= 64); \ 257 assert(h <= 64); \ 258 if (x_step_q4 == 16 && y_step_q4 == 16) { \ 259 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ 260 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ 261 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 71); \ 262 vp9_high_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ 263 CONVERT_TO_BYTEPTR(fdata2), 64, \ 264 filter_x, x_step_q4, filter_y, y_step_q4, \ 265 w, h + 7, bd); \ 266 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ 267 64, dst, dst_stride, \ 268 filter_x, x_step_q4, filter_y, \ 269 y_step_q4, w, h, bd); \ 270 } else { \ 271 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 65); \ 272 vp9_high_convolve8_horiz_##opt(src, src_stride, \ 273 CONVERT_TO_BYTEPTR(fdata2), 64, \ 274 filter_x, x_step_q4, filter_y, y_step_q4, \ 275 w, h + 1, bd); \ 276 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ 277 dst, dst_stride, \ 278 filter_x, x_step_q4, filter_y, \ 279 y_step_q4, w, h, bd); \ 280 } \ 281 } else { \ 282 vp9_high_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ 283 filter_x, x_step_q4, filter_y, y_step_q4, w, \ 284 h, bd); \ 285 } \ 286} 287#endif // CONFIG_VP9_HIGHBITDEPTH 288 289#if HAVE_AVX2 && HAVE_SSSE3 290filter8_1dfunction vp9_filter_block1d16_v8_avx2; 291filter8_1dfunction vp9_filter_block1d16_h8_avx2; 292filter8_1dfunction vp9_filter_block1d4_v8_ssse3; 293#if ARCH_X86_64 294filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; 295filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; 296filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; 297#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 298#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 299#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 300#else // ARCH_X86 301filter8_1dfunction vp9_filter_block1d8_v8_ssse3; 302filter8_1dfunction vp9_filter_block1d8_h8_ssse3; 303filter8_1dfunction vp9_filter_block1d4_h8_ssse3; 304#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 305#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 306#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 307#endif // ARCH_X86_64 / ARCH_X86 308filter8_1dfunction vp9_filter_block1d16_v2_ssse3; 309filter8_1dfunction vp9_filter_block1d16_h2_ssse3; 310filter8_1dfunction vp9_filter_block1d8_v2_ssse3; 311filter8_1dfunction vp9_filter_block1d8_h2_ssse3; 312filter8_1dfunction vp9_filter_block1d4_v2_ssse3; 313filter8_1dfunction vp9_filter_block1d4_h2_ssse3; 314#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 315#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 316#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 317#define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 318#define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 319#define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 320#define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 321// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, 322// uint8_t *dst, ptrdiff_t dst_stride, 323// const int16_t *filter_x, int x_step_q4, 324// const int16_t *filter_y, int y_step_q4, 325// int w, int h); 326// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, 327// uint8_t *dst, ptrdiff_t dst_stride, 328// const int16_t *filter_x, int x_step_q4, 329// const int16_t *filter_y, int y_step_q4, 330// int w, int h); 331FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); 332FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); 333 334// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, 335// uint8_t *dst, ptrdiff_t dst_stride, 336// const int16_t *filter_x, int x_step_q4, 337// const int16_t *filter_y, int y_step_q4, 338// int w, int h); 339FUN_CONV_2D(, avx2); 340#endif // HAVE_AX2 && HAVE_SSSE3 341#if HAVE_SSSE3 342#if ARCH_X86_64 343filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; 344filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; 345filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; 346filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; 347filter8_1dfunction vp9_filter_block1d4_v8_ssse3; 348filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; 349#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 350#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 351#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 352#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 353#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 354#else // ARCH_X86 355filter8_1dfunction vp9_filter_block1d16_v8_ssse3; 356filter8_1dfunction vp9_filter_block1d16_h8_ssse3; 357filter8_1dfunction vp9_filter_block1d8_v8_ssse3; 358filter8_1dfunction vp9_filter_block1d8_h8_ssse3; 359filter8_1dfunction vp9_filter_block1d4_v8_ssse3; 360filter8_1dfunction vp9_filter_block1d4_h8_ssse3; 361#endif // ARCH_X86_64 / ARCH_X86 362filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; 363filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; 364filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; 365filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; 366filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; 367filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; 368 369filter8_1dfunction vp9_filter_block1d16_v2_ssse3; 370filter8_1dfunction vp9_filter_block1d16_h2_ssse3; 371filter8_1dfunction vp9_filter_block1d8_v2_ssse3; 372filter8_1dfunction vp9_filter_block1d8_h2_ssse3; 373filter8_1dfunction vp9_filter_block1d4_v2_ssse3; 374filter8_1dfunction vp9_filter_block1d4_h2_ssse3; 375filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3; 376filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3; 377filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3; 378filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3; 379filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3; 380filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3; 381 382// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 383// uint8_t *dst, ptrdiff_t dst_stride, 384// const int16_t *filter_x, int x_step_q4, 385// const int16_t *filter_y, int y_step_q4, 386// int w, int h); 387// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 388// uint8_t *dst, ptrdiff_t dst_stride, 389// const int16_t *filter_x, int x_step_q4, 390// const int16_t *filter_y, int y_step_q4, 391// int w, int h); 392// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 393// uint8_t *dst, ptrdiff_t dst_stride, 394// const int16_t *filter_x, int x_step_q4, 395// const int16_t *filter_y, int y_step_q4, 396// int w, int h); 397// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 398// uint8_t *dst, ptrdiff_t dst_stride, 399// const int16_t *filter_x, int x_step_q4, 400// const int16_t *filter_y, int y_step_q4, 401// int w, int h); 402FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); 403FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); 404FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); 405FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, 406 ssse3); 407 408// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, 409// uint8_t *dst, ptrdiff_t dst_stride, 410// const int16_t *filter_x, int x_step_q4, 411// const int16_t *filter_y, int y_step_q4, 412// int w, int h); 413// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, 414// uint8_t *dst, ptrdiff_t dst_stride, 415// const int16_t *filter_x, int x_step_q4, 416// const int16_t *filter_y, int y_step_q4, 417// int w, int h); 418FUN_CONV_2D(, ssse3); 419FUN_CONV_2D(avg_ , ssse3); 420#endif // HAVE_SSSE3 421 422#if HAVE_SSE2 423filter8_1dfunction vp9_filter_block1d16_v8_sse2; 424filter8_1dfunction vp9_filter_block1d16_h8_sse2; 425filter8_1dfunction vp9_filter_block1d8_v8_sse2; 426filter8_1dfunction vp9_filter_block1d8_h8_sse2; 427filter8_1dfunction vp9_filter_block1d4_v8_sse2; 428filter8_1dfunction vp9_filter_block1d4_h8_sse2; 429filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; 430filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; 431filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2; 432filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2; 433filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2; 434filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2; 435 436filter8_1dfunction vp9_filter_block1d16_v2_sse2; 437filter8_1dfunction vp9_filter_block1d16_h2_sse2; 438filter8_1dfunction vp9_filter_block1d8_v2_sse2; 439filter8_1dfunction vp9_filter_block1d8_h2_sse2; 440filter8_1dfunction vp9_filter_block1d4_v2_sse2; 441filter8_1dfunction vp9_filter_block1d4_h2_sse2; 442filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2; 443filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2; 444filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2; 445filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2; 446filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2; 447filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2; 448 449// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 450// uint8_t *dst, ptrdiff_t dst_stride, 451// const int16_t *filter_x, int x_step_q4, 452// const int16_t *filter_y, int y_step_q4, 453// int w, int h); 454// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 455// uint8_t *dst, ptrdiff_t dst_stride, 456// const int16_t *filter_x, int x_step_q4, 457// const int16_t *filter_y, int y_step_q4, 458// int w, int h); 459// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 460// uint8_t *dst, ptrdiff_t dst_stride, 461// const int16_t *filter_x, int x_step_q4, 462// const int16_t *filter_y, int y_step_q4, 463// int w, int h); 464// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 465// uint8_t *dst, ptrdiff_t dst_stride, 466// const int16_t *filter_x, int x_step_q4, 467// const int16_t *filter_y, int y_step_q4, 468// int w, int h); 469FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); 470FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); 471FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); 472FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); 473 474// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, 475// uint8_t *dst, ptrdiff_t dst_stride, 476// const int16_t *filter_x, int x_step_q4, 477// const int16_t *filter_y, int y_step_q4, 478// int w, int h); 479// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, 480// uint8_t *dst, ptrdiff_t dst_stride, 481// const int16_t *filter_x, int x_step_q4, 482// const int16_t *filter_y, int y_step_q4, 483// int w, int h); 484FUN_CONV_2D(, sse2); 485FUN_CONV_2D(avg_ , sse2); 486 487#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 488high_filter8_1dfunction vp9_high_filter_block1d16_v8_sse2; 489high_filter8_1dfunction vp9_high_filter_block1d16_h8_sse2; 490high_filter8_1dfunction vp9_high_filter_block1d8_v8_sse2; 491high_filter8_1dfunction vp9_high_filter_block1d8_h8_sse2; 492high_filter8_1dfunction vp9_high_filter_block1d4_v8_sse2; 493high_filter8_1dfunction vp9_high_filter_block1d4_h8_sse2; 494high_filter8_1dfunction vp9_high_filter_block1d16_v8_avg_sse2; 495high_filter8_1dfunction vp9_high_filter_block1d16_h8_avg_sse2; 496high_filter8_1dfunction vp9_high_filter_block1d8_v8_avg_sse2; 497high_filter8_1dfunction vp9_high_filter_block1d8_h8_avg_sse2; 498high_filter8_1dfunction vp9_high_filter_block1d4_v8_avg_sse2; 499high_filter8_1dfunction vp9_high_filter_block1d4_h8_avg_sse2; 500 501high_filter8_1dfunction vp9_high_filter_block1d16_v2_sse2; 502high_filter8_1dfunction vp9_high_filter_block1d16_h2_sse2; 503high_filter8_1dfunction vp9_high_filter_block1d8_v2_sse2; 504high_filter8_1dfunction vp9_high_filter_block1d8_h2_sse2; 505high_filter8_1dfunction vp9_high_filter_block1d4_v2_sse2; 506high_filter8_1dfunction vp9_high_filter_block1d4_h2_sse2; 507high_filter8_1dfunction vp9_high_filter_block1d16_v2_avg_sse2; 508high_filter8_1dfunction vp9_high_filter_block1d16_h2_avg_sse2; 509high_filter8_1dfunction vp9_high_filter_block1d8_v2_avg_sse2; 510high_filter8_1dfunction vp9_high_filter_block1d8_h2_avg_sse2; 511high_filter8_1dfunction vp9_high_filter_block1d4_v2_avg_sse2; 512high_filter8_1dfunction vp9_high_filter_block1d4_h2_avg_sse2; 513 514// void vp9_high_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 515// uint8_t *dst, ptrdiff_t dst_stride, 516// const int16_t *filter_x, int x_step_q4, 517// const int16_t *filter_y, int y_step_q4, 518// int w, int h, int bd); 519// void vp9_high_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 520// uint8_t *dst, ptrdiff_t dst_stride, 521// const int16_t *filter_x, int x_step_q4, 522// const int16_t *filter_y, int y_step_q4, 523// int w, int h, int bd); 524// void vp9_high_convolve8_avg_horiz_sse2(const uint8_t *src, 525// ptrdiff_t src_stride, 526// uint8_t *dst, ptrdiff_t dst_stride, 527// const int16_t *filter_x, 528// int x_step_q4, 529// const int16_t *filter_y, 530// int y_step_q4, 531// int w, int h, int bd); 532// void vp9_high_convolve8_avg_vert_sse2(const uint8_t *src, 533// ptrdiff_t src_stride, 534// uint8_t *dst, ptrdiff_t dst_stride, 535// const int16_t *filter_x, int x_step_q4, 536// const int16_t *filter_y, int y_step_q4, 537// int w, int h, int bd); 538HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); 539HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); 540HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); 541HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, 542 sse2); 543 544// void vp9_high_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, 545// uint8_t *dst, ptrdiff_t dst_stride, 546// const int16_t *filter_x, int x_step_q4, 547// const int16_t *filter_y, int y_step_q4, 548// int w, int h, int bd); 549// void vp9_high_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, 550// uint8_t *dst, ptrdiff_t dst_stride, 551// const int16_t *filter_x, int x_step_q4, 552// const int16_t *filter_y, int y_step_q4, 553// int w, int h, int bd); 554HIGH_FUN_CONV_2D(, sse2); 555HIGH_FUN_CONV_2D(avg_ , sse2); 556#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 557#endif // HAVE_SSE2 558