1sub vp9_common_forward_decls() { 2print <<EOF 3/* 4 * VP9 5 */ 6 7#include "vpx/vpx_integer.h" 8#include "vp9/common/vp9_enums.h" 9 10struct macroblockd; 11 12/* Encoder forward decls */ 13struct macroblock; 14struct vp9_variance_vtable; 15 16#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] 17struct mv; 18union int_mv; 19struct yv12_buffer_config; 20EOF 21} 22forward_decls qw/vp9_common_forward_decls/; 23 24# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. 25if (vpx_config("CONFIG_USE_X86INC") eq "yes") { 26 $mmx_x86inc = 'mmx'; 27 $sse_x86inc = 'sse'; 28 $sse2_x86inc = 'sse2'; 29 $ssse3_x86inc = 'ssse3'; 30 $avx_x86inc = 'avx'; 31 $avx2_x86inc = 'avx2'; 32} else { 33 $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = 34 $avx_x86inc = $avx2_x86inc = ''; 35} 36 37# this variable is for functions that are 64 bit only. 38if ($opts{arch} eq "x86_64") { 39 $mmx_x86_64 = 'mmx'; 40 $sse2_x86_64 = 'sse2'; 41 $ssse3_x86_64 = 'ssse3'; 42 $avx_x86_64 = 'avx'; 43 $avx2_x86_64 = 'avx2'; 44} else { 45 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = 46 $avx_x86_64 = $avx2_x86_64 = ''; 47} 48 49# 50# RECON 51# 52add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 53specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc"; 54 55add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 56specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc"; 57 58add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 59specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc"; 60 61add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 62specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc"; 63 64add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 65specialize qw/vp9_d117_predictor_4x4/; 66 67add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 68specialize qw/vp9_d135_predictor_4x4/; 69 70add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 71specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; 72 73add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 74specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc"; 75 76add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 77specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc"; 78 79add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 80specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc"; 81 82add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 83specialize qw/vp9_dc_top_predictor_4x4/; 84 85add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 86specialize qw/vp9_dc_left_predictor_4x4/; 87 88add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 89specialize qw/vp9_dc_128_predictor_4x4/; 90 91add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 92specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; 93 94add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 95specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc"; 96 97add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 98specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; 99 100add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 101specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc"; 102 103add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 104specialize qw/vp9_d117_predictor_8x8/; 105 106add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 107specialize qw/vp9_d135_predictor_8x8/; 108 109add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 110specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc"; 111 112add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 113specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc"; 114 115add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 116specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc"; 117 118add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 119specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc"; 120 121add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 122specialize qw/vp9_dc_top_predictor_8x8/; 123 124add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 125specialize qw/vp9_dc_left_predictor_8x8/; 126 127add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 128specialize qw/vp9_dc_128_predictor_8x8/; 129 130add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 131specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc"; 132 133add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 134specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc"; 135 136add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 137specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; 138 139add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 140specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc"; 141 142add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 143specialize qw/vp9_d117_predictor_16x16/; 144 145add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 146specialize qw/vp9_d135_predictor_16x16/; 147 148add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 149specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc"; 150 151add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 152specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc"; 153 154add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 155specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc"; 156 157add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 158specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc"; 159 160add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 161specialize qw/vp9_dc_top_predictor_16x16/; 162 163add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 164specialize qw/vp9_dc_left_predictor_16x16/; 165 166add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 167specialize qw/vp9_dc_128_predictor_16x16/; 168 169add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 170specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc"; 171 172add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 173specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc"; 174 175add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 176specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc"; 177 178add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 179specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc"; 180 181add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 182specialize qw/vp9_d117_predictor_32x32/; 183 184add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 185specialize qw/vp9_d135_predictor_32x32/; 186 187add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 188specialize qw/vp9_d153_predictor_32x32/; 189 190add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 191specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc"; 192 193add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 194specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64"; 195 196add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 197specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc"; 198 199add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 200specialize qw/vp9_dc_top_predictor_32x32/; 201 202add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 203specialize qw/vp9_dc_left_predictor_32x32/; 204 205add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 206specialize qw/vp9_dc_128_predictor_32x32/; 207 208# 209# Loopfilter 210# 211add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 212specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/; 213 214add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 215specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/; 216 217add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 218specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/; 219 220add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 221specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/; 222 223add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 224specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/; 225 226add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 227specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/; 228 229add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 230specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/; 231 232add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 233specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/; 234 235add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 236specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/; 237 238add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; 239specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/; 240 241add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 242specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/; 243 244# 245# post proc 246# 247if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { 248add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit"; 249specialize qw/vp9_mbpost_proc_down mmx sse2/; 250$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm; 251 252add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit"; 253specialize qw/vp9_mbpost_proc_across_ip sse2/; 254$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm; 255 256add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; 257specialize qw/vp9_post_proc_down_and_across mmx sse2/; 258$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm; 259 260add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; 261specialize qw/vp9_plane_add_noise mmx sse2/; 262$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt; 263} 264 265add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; 266specialize qw/vp9_blend_mb_inner/; 267 268add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; 269specialize qw/vp9_blend_mb_outer/; 270 271add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; 272specialize qw/vp9_blend_b/; 273 274# 275# Sub Pixel Filters 276# 277add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 278specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc"; 279 280add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 281specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc"; 282 283add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 284specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/; 285 286add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 287specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/; 288 289add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 290specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/; 291 292add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 293specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/; 294 295add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 296specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/; 297 298add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 299specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/; 300 301# 302# dct 303# 304add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 305specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/; 306 307add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 308specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/; 309 310add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 311specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/; 312 313add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 314specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/; 315 316add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 317specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/; 318 319add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 320specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/; 321 322add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 323specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/; 324 325add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 326specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/; 327 328add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 329specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/; 330 331add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 332specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/; 333$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon; 334 335add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 336specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/; 337 338add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; 339specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/; 340 341add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; 342specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/; 343 344add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type"; 345specialize qw/vp9_iht16x16_256_add sse2 dspr2/; 346 347# dct and add 348 349add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 350specialize qw/vp9_iwht4x4_1_add/; 351 352add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; 353specialize qw/vp9_iwht4x4_16_add/; 354 355# 356# Encoder functions below this point. 357# 358if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { 359 360 361# variance 362add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 363specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc"; 364 365add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 366specialize qw/vp9_variance16x32/, "$sse2_x86inc"; 367 368add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 369specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc"; 370 371add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 372specialize qw/vp9_variance32x64/, "$sse2_x86inc"; 373 374add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 375specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc"; 376 377add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 378specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc"; 379 380add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 381specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; 382 383add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; 384specialize qw/vp9_get_sse_sum_16x16 sse2/; 385$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2; 386 387add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 388specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc"; 389 390add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 391specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc"; 392 393add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 394specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc"; 395 396add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; 397specialize qw/vp9_get_sse_sum_8x8 sse2/; 398$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2; 399 400add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 401specialize qw/vp9_variance8x4/, "$sse2_x86inc"; 402 403add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 404specialize qw/vp9_variance4x8/, "$sse2_x86inc"; 405 406add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 407specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; 408 409add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 410specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 411 412add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 413specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 414 415add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 416specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; 417 418add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 419specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; 420 421add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 422specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; 423 424add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 425specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; 426 427add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 428specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; 429 430add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 431specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; 432 433add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 434specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; 435 436add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 437specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; 438 439add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 440specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 441 442add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 443specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; 444 445add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 446specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; 447 448add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 449specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; 450 451add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 452specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; 453 454add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 455specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; 456 457add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 458specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; 459 460add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 461specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; 462 463add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 464specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; 465 466add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 467specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; 468 469# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form 470add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 471specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; 472 473add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 474specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; 475 476add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 477specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; 478 479add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 480specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; 481 482add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 483specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; 484#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt 485 486add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; 487specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; 488 489add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 490specialize qw/vp9_sad64x64/, "$sse2_x86inc"; 491 492add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 493specialize qw/vp9_sad32x64/, "$sse2_x86inc"; 494 495add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 496specialize qw/vp9_sad64x32/, "$sse2_x86inc"; 497 498add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 499specialize qw/vp9_sad32x16/, "$sse2_x86inc"; 500 501add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 502specialize qw/vp9_sad16x32/, "$sse2_x86inc"; 503 504add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 505specialize qw/vp9_sad32x32/, "$sse2_x86inc"; 506 507add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 508specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc"; 509 510add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 511specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc"; 512 513add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 514specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc"; 515 516add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 517specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc"; 518 519add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 520specialize qw/vp9_sad8x4/, "$sse2_x86inc"; 521 522add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 523specialize qw/vp9_sad4x8/, "$sse_x86inc"; 524 525add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; 526specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc"; 527 528add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 529specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; 530 531add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 532specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; 533 534add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 535specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; 536 537add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 538specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; 539 540add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 541specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; 542 543add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 544specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; 545 546add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 547specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; 548 549add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 550specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc"; 551 552add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 553specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc"; 554 555add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 556specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc"; 557 558add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 559specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc"; 560 561add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 562specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; 563 564add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; 565specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; 566 567add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 568specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc"; 569 570add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 571specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc"; 572 573add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 574specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc"; 575 576add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 577specialize qw/vp9_variance_halfpixvar64x64_h/; 578 579add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 580specialize qw/vp9_variance_halfpixvar64x64_v/; 581 582add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 583specialize qw/vp9_variance_halfpixvar64x64_hv/; 584 585add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 586specialize qw/vp9_variance_halfpixvar32x32_h/; 587 588add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 589specialize qw/vp9_variance_halfpixvar32x32_v/; 590 591add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 592specialize qw/vp9_variance_halfpixvar32x32_hv/; 593 594add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 595specialize qw/vp9_sad64x64x3/; 596 597add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 598specialize qw/vp9_sad32x32x3/; 599 600add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 601specialize qw/vp9_sad16x16x3 sse3 ssse3/; 602 603add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 604specialize qw/vp9_sad16x8x3 sse3 ssse3/; 605 606add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 607specialize qw/vp9_sad8x16x3 sse3/; 608 609add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 610specialize qw/vp9_sad8x8x3 sse3/; 611 612add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; 613specialize qw/vp9_sad4x4x3 sse3/; 614 615add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 616specialize qw/vp9_sad64x64x8/; 617 618add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 619specialize qw/vp9_sad32x32x8/; 620 621add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 622specialize qw/vp9_sad16x16x8 sse4/; 623 624add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 625specialize qw/vp9_sad16x8x8 sse4/; 626 627add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 628specialize qw/vp9_sad8x16x8 sse4/; 629 630add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 631specialize qw/vp9_sad8x8x8 sse4/; 632 633add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 634specialize qw/vp9_sad8x4x8/; 635 636add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 637specialize qw/vp9_sad4x8x8/; 638 639add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; 640specialize qw/vp9_sad4x4x8 sse4/; 641 642add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 643specialize qw/vp9_sad64x64x4d sse2 avx2/; 644 645add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 646specialize qw/vp9_sad32x64x4d sse2/; 647 648add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 649specialize qw/vp9_sad64x32x4d sse2/; 650 651add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 652specialize qw/vp9_sad32x16x4d sse2/; 653 654add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 655specialize qw/vp9_sad16x32x4d sse2/; 656 657add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 658specialize qw/vp9_sad32x32x4d sse2 avx2/; 659 660add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 661specialize qw/vp9_sad16x16x4d sse2/; 662 663add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 664specialize qw/vp9_sad16x8x4d sse2/; 665 666add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 667specialize qw/vp9_sad8x16x4d sse2/; 668 669add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 670specialize qw/vp9_sad8x8x4d sse2/; 671 672# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form 673add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 674specialize qw/vp9_sad8x4x4d sse2/; 675 676add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 677specialize qw/vp9_sad4x8x4d sse/; 678 679add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; 680specialize qw/vp9_sad4x4x4d sse/; 681 682#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"; 683#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/; 684 685add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 686specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; 687 688add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 689specialize qw/vp9_mse8x16/; 690 691add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 692specialize qw/vp9_mse16x8/; 693 694add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 695specialize qw/vp9_mse8x8/; 696 697add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 698specialize qw/vp9_sub_pixel_mse64x64/; 699 700add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 701specialize qw/vp9_sub_pixel_mse32x32/; 702 703add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; 704specialize qw/vp9_get_mb_ss mmx sse2/; 705# ENCODEMB INVOKE 706 707add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"; 708specialize qw/vp9_block_error/, "$sse2_x86inc"; 709 710add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 711specialize qw/vp9_subtract_block/, "$sse2_x86inc"; 712 713add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 714specialize qw/vp9_quantize_b/, "$ssse3_x86_64"; 715 716add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 717specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64"; 718 719# 720# Structured Similarity (SSIM) 721# 722if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { 723 add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; 724 specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64"; 725 726 add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; 727 specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64"; 728} 729 730# fdct functions 731add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type"; 732specialize qw/vp9_fht4x4 sse2 avx2/; 733 734add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type"; 735specialize qw/vp9_fht8x8 sse2 avx2/; 736 737add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type"; 738specialize qw/vp9_fht16x16 sse2 avx2/; 739 740add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; 741specialize qw/vp9_fwht4x4/; 742 743add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride"; 744specialize qw/vp9_fdct4x4 sse2 avx2/; 745 746add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride"; 747specialize qw/vp9_fdct8x8 sse2 avx2/; 748 749add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride"; 750specialize qw/vp9_fdct16x16 sse2 avx2/; 751 752add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride"; 753specialize qw/vp9_fdct32x32 sse2 avx2/; 754 755add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride"; 756specialize qw/vp9_fdct32x32_rd sse2 avx2/; 757 758# 759# Motion search 760# 761add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"; 762specialize qw/vp9_full_search_sad sse3 sse4_1/; 763$vp9_full_search_sad_sse3=vp9_full_search_sadx3; 764$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; 765 766add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; 767specialize qw/vp9_refining_search_sad sse3/; 768$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4; 769 770add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; 771specialize qw/vp9_diamond_search_sad sse3/; 772$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4; 773 774add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; 775specialize qw/vp9_full_range_search/; 776 777add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; 778specialize qw/vp9_temporal_filter_apply sse2/; 779 780} 781# end encoder functions 7821; 783