1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include <assert.h> 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vpx_config.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywantypedef void filter8_1dfunction ( 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan const ptrdiff_t src_pitch, 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *output_ptr, 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan ptrdiff_t out_pitch, 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_height, 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *filter 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst, ptrdiff_t dst_stride, \ 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_x, int x_step_q4, \ 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_y, int y_step_q4, \ 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, int h) { \ 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (step_q4 == 16 && filter[3] != 128) { \ 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (filter[0] || filter[1] || filter[2]) { \ 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan while (w >= 16) { \ 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_stride, \ 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, \ 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_stride, \ 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, \ 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter); \ 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += 16; \ 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += 16; \ 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan w -= 16; \ 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan while (w >= 8) { \ 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_stride, \ 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, \ 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_stride, \ 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, \ 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter); \ 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += 8; \ 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += 8; \ 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan w -= 8; \ 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan while (w >= 4) { \ 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_stride, \ 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, \ 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_stride, \ 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, \ 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter); \ 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += 4; \ 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += 4; \ 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan w -= 4; \ 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan } else { \ 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan while (w >= 16) { \ 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_filter_block1d16_##dir##2_##avg##opt(src, \ 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_stride, \ 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, \ 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_stride, \ 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, \ 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter); \ 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += 16; \ 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += 16; \ 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan w -= 16; \ 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan while (w >= 8) { \ 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_filter_block1d8_##dir##2_##avg##opt(src, \ 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_stride, \ 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, \ 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_stride, \ 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, \ 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter); \ 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += 8; \ 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += 8; \ 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan w -= 8; \ 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan while (w >= 4) { \ 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_filter_block1d4_##dir##2_##avg##opt(src, \ 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_stride, \ 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, \ 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_stride, \ 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, \ 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter); \ 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += 4; \ 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += 4; \ 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan w -= 4; \ 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (w) { \ 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, filter_y, y_step_q4, \ 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h); \ 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FUN_CONV_2D(avg, opt) \ 111233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst, ptrdiff_t dst_stride, \ 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_x, int x_step_q4, \ 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_y, int y_step_q4, \ 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, int h) { \ 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan assert(w <= 64); \ 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan assert(h <= 64); \ 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (x_step_q4 == 16 && y_step_q4 == 16) { \ 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \ 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, filter_y, y_step_q4, \ 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h + 7); \ 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, filter_y, \ 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_step_q4, w, h); \ 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan } else { \ 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \ 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, filter_y, y_step_q4, \ 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h + 1); \ 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, filter_y, \ 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_step_q4, w, h); \ 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan } else { \ 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_AVX2 143233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_avx2; 144233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_avx2; 145233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_ssse3; 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if (ARCH_X86_64) 147233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; 148233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; 149233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan#else 154233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_ssse3; 155233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_ssse3; 156233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_ssse3; 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 161233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_ssse3; 162233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_ssse3; 163233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_ssse3; 164233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_ssse3; 165233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_ssse3; 166233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_ssse3; 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 184233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); 185233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 192233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(, avx2); 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_SSSE3 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if (ARCH_X86_64) 196233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; 197233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; 198233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; 199233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; 200233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_ssse3; 201233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan#else 208233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_ssse3; 209233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_ssse3; 210233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_ssse3; 211233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_ssse3; 212233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_ssse3; 213233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_ssse3; 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 215233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; 216233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; 217233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; 218233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; 219233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; 220233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan 222233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_ssse3; 223233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_ssse3; 224233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_ssse3; 225233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_ssse3; 226233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_ssse3; 227233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_ssse3; 228233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3; 229233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3; 230233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3; 231233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3; 232233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3; 233233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3; 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 255233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); 256233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); 257233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); 258233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssse3); 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 271233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(, ssse3); 272233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(avg_ , ssse3); 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_SSE2 276233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_sse2; 277233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_sse2; 278233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_sse2; 279233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_sse2; 280233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_sse2; 281233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_sse2; 282233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; 283233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; 284233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v8_avg_sse2; 285233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h8_avg_sse2; 286233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v8_avg_sse2; 287233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h8_avg_sse2; 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan 289233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_sse2; 290233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_sse2; 291233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_sse2; 292233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_sse2; 293233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_sse2; 294233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_sse2; 295233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_v2_avg_sse2; 296233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d16_h2_avg_sse2; 297233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_v2_avg_sse2; 298233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d8_h2_avg_sse2; 299233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_v2_avg_sse2; 300233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_1dfunction vp9_filter_block1d4_h2_avg_sse2; 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 322233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); 323233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); 324233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); 325233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan// uint8_t *dst, ptrdiff_t dst_stride, 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_x, int x_step_q4, 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan// const int16_t *filter_y, int y_step_q4, 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan// int w, int h); 337233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(, sse2); 338233d2500723e5594f3e7c70896ffeeef32b9c950ywanFUN_CONV_2D(avg_ , sse2); 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 340