1sub vp9_common_forward_decls() {
2print <<EOF
3/*
4 * VP9
5 */
6
7#include "vpx/vpx_integer.h"
8#include "vp9/common/vp9_enums.h"
9
10struct macroblockd;
11
12/* Encoder forward decls */
13struct macroblock;
14struct vp9_variance_vtable;
15
16#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
17struct mv;
18union int_mv;
19struct yv12_buffer_config;
20EOF
21}
22forward_decls qw/vp9_common_forward_decls/;
23
24# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
25if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
26  $mmx_x86inc = 'mmx';
27  $sse_x86inc = 'sse';
28  $sse2_x86inc = 'sse2';
29  $ssse3_x86inc = 'ssse3';
30  $avx_x86inc = 'avx';
31  $avx2_x86inc = 'avx2';
32} else {
33  $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
34  $avx_x86inc = $avx2_x86inc = '';
35}
36
37# this variable is for functions that are 64 bit only.
38if ($opts{arch} eq "x86_64") {
39  $mmx_x86_64 = 'mmx';
40  $sse2_x86_64 = 'sse2';
41  $ssse3_x86_64 = 'ssse3';
42  $avx_x86_64 = 'avx';
43  $avx2_x86_64 = 'avx2';
44} else {
45  $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
46  $avx_x86_64 = $avx2_x86_64 = '';
47}
48
49#
50# RECON
51#
52add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
53specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
54
55add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
56specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
57
58add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
59specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
60
61add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
62specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
63
64add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
65specialize qw/vp9_d117_predictor_4x4/;
66
67add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
68specialize qw/vp9_d135_predictor_4x4/;
69
70add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
71specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
72
73add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
74specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
75
76add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
77specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
78
79add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
80specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
81
82add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
83specialize qw/vp9_dc_top_predictor_4x4/;
84
85add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
86specialize qw/vp9_dc_left_predictor_4x4/;
87
88add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
89specialize qw/vp9_dc_128_predictor_4x4/;
90
91add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
92specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
93
94add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
95specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";
96
97add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
98specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
99
100add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
101specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
102
103add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
104specialize qw/vp9_d117_predictor_8x8/;
105
106add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
107specialize qw/vp9_d135_predictor_8x8/;
108
109add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
110specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
111
112add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
113specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
114
115add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
116specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
117
118add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
119specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
120
121add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
122specialize qw/vp9_dc_top_predictor_8x8/;
123
124add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
125specialize qw/vp9_dc_left_predictor_8x8/;
126
127add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
128specialize qw/vp9_dc_128_predictor_8x8/;
129
130add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
131specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
132
133add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
134specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
135
136add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
137specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
138
139add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
140specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
141
142add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
143specialize qw/vp9_d117_predictor_16x16/;
144
145add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
146specialize qw/vp9_d135_predictor_16x16/;
147
148add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
149specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
150
151add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
152specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
153
154add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
155specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
156
157add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
158specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
159
160add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
161specialize qw/vp9_dc_top_predictor_16x16/;
162
163add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
164specialize qw/vp9_dc_left_predictor_16x16/;
165
166add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
167specialize qw/vp9_dc_128_predictor_16x16/;
168
169add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
170specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
171
172add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
173specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";
174
175add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
176specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
177
178add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
179specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
180
181add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
182specialize qw/vp9_d117_predictor_32x32/;
183
184add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
185specialize qw/vp9_d135_predictor_32x32/;
186
187add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
188specialize qw/vp9_d153_predictor_32x32/;
189
190add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
191specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
192
193add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
194specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
195
196add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
197specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
198
199add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
200specialize qw/vp9_dc_top_predictor_32x32/;
201
202add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
203specialize qw/vp9_dc_left_predictor_32x32/;
204
205add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
206specialize qw/vp9_dc_128_predictor_32x32/;
207
208#
209# Loopfilter
210#
211add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
212specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
213
214add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
215specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
216
217add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
218specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
219
220add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
221specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
222
223add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
224specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
225
226add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
227specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
228
229add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
230specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
231
232add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
233specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
234
235add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
236specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
237
238add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
239specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
240
241add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
242specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
243
244#
245# post proc
246#
247if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
248add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
249specialize qw/vp9_mbpost_proc_down mmx sse2/;
250$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;
251
252add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
253specialize qw/vp9_mbpost_proc_across_ip sse2/;
254$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;
255
256add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
257specialize qw/vp9_post_proc_down_and_across mmx sse2/;
258$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
259
260add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
261specialize qw/vp9_plane_add_noise mmx sse2/;
262$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
263}
264
265add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
266specialize qw/vp9_blend_mb_inner/;
267
268add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
269specialize qw/vp9_blend_mb_outer/;
270
271add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
272specialize qw/vp9_blend_b/;
273
274#
275# Sub Pixel Filters
276#
277add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
278specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
279
280add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
281specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
282
283add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
284specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
285
286add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
287specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
288
289add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
290specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
291
292add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
293specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
294
295add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
296specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
297
298add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
299specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
300
301#
302# dct
303#
304add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
305specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
306
307add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
308specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
309
310add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
311specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
312
313add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
314specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/;
315
316add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
317specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
318
319add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
320specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
321
322add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
323specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
324
325add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
326specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
327
328add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
329specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
330
331add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
332specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
333$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
334
335add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
336specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
337
338add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
339specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
340
341add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
342specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
343
344add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
345specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
346
347# dct and add
348
349add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
350specialize qw/vp9_iwht4x4_1_add/;
351
352add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
353specialize qw/vp9_iwht4x4_16_add/;
354
355#
356# Encoder functions below this point.
357#
358if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
359
360
361# variance
362add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
363specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";
364
365add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
366specialize qw/vp9_variance16x32/, "$sse2_x86inc";
367
368add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
369specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";
370
371add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
372specialize qw/vp9_variance32x64/, "$sse2_x86inc";
373
374add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
375specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";
376
377add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
378specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
379
380add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
381specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
382
383add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
384specialize qw/vp9_get_sse_sum_16x16 sse2/;
385$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
386
387add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
388specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
389
390add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
391specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
392
393add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
394specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
395
396add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
397specialize qw/vp9_get_sse_sum_8x8 sse2/;
398$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
399
400add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
401specialize qw/vp9_variance8x4/, "$sse2_x86inc";
402
403add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
404specialize qw/vp9_variance4x8/, "$sse2_x86inc";
405
406add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
407specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
408
409add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
410specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
411
412add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
413specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
414
415add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
416specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
417
418add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
419specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
420
421add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
422specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
423
424add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
425specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
426
427add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
428specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
429
430add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
431specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
432
433add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
434specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
435
436add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
437specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
438
439add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
440specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
441
442add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
443specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
444
445add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
446specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
447
448add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
449specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
450
451add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
452specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
453
454add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
455specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
456
457add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
458specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
459
460add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
461specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
462
463add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
464specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
465
466add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
467specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
468
469# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
470add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
471specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
472
473add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
474specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
475
476add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
477specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
478
479add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
480specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
481
482add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
483specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
484#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
485
486add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
487specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
488
489add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
490specialize qw/vp9_sad64x64/, "$sse2_x86inc";
491
492add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
493specialize qw/vp9_sad32x64/, "$sse2_x86inc";
494
495add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
496specialize qw/vp9_sad64x32/, "$sse2_x86inc";
497
498add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
499specialize qw/vp9_sad32x16/, "$sse2_x86inc";
500
501add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
502specialize qw/vp9_sad16x32/, "$sse2_x86inc";
503
504add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
505specialize qw/vp9_sad32x32/, "$sse2_x86inc";
506
507add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
508specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";
509
510add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
511specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";
512
513add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
514specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";
515
516add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
517specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";
518
519add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
520specialize qw/vp9_sad8x4/, "$sse2_x86inc";
521
522add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
523specialize qw/vp9_sad4x8/, "$sse_x86inc";
524
525add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
526specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";
527
528add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
529specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
530
531add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
532specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
533
534add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
535specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
536
537add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
538specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
539
540add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
541specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
542
543add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
544specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
545
546add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
547specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
548
549add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
550specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
551
552add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
553specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
554
555add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
556specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
557
558add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
559specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
560
561add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
562specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
563
564add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
565specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
566
567add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
568specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
569
570add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
571specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
572
573add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
574specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
575
576add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
577specialize qw/vp9_variance_halfpixvar64x64_h/;
578
579add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
580specialize qw/vp9_variance_halfpixvar64x64_v/;
581
582add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
583specialize qw/vp9_variance_halfpixvar64x64_hv/;
584
585add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
586specialize qw/vp9_variance_halfpixvar32x32_h/;
587
588add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
589specialize qw/vp9_variance_halfpixvar32x32_v/;
590
591add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
592specialize qw/vp9_variance_halfpixvar32x32_hv/;
593
594add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
595specialize qw/vp9_sad64x64x3/;
596
597add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
598specialize qw/vp9_sad32x32x3/;
599
600add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
601specialize qw/vp9_sad16x16x3 sse3 ssse3/;
602
603add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
604specialize qw/vp9_sad16x8x3 sse3 ssse3/;
605
606add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
607specialize qw/vp9_sad8x16x3 sse3/;
608
609add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
610specialize qw/vp9_sad8x8x3 sse3/;
611
612add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
613specialize qw/vp9_sad4x4x3 sse3/;
614
615add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
616specialize qw/vp9_sad64x64x8/;
617
618add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
619specialize qw/vp9_sad32x32x8/;
620
621add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
622specialize qw/vp9_sad16x16x8 sse4/;
623
624add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
625specialize qw/vp9_sad16x8x8 sse4/;
626
627add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
628specialize qw/vp9_sad8x16x8 sse4/;
629
630add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
631specialize qw/vp9_sad8x8x8 sse4/;
632
633add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
634specialize qw/vp9_sad8x4x8/;
635
636add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
637specialize qw/vp9_sad4x8x8/;
638
639add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
640specialize qw/vp9_sad4x4x8 sse4/;
641
642add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
643specialize qw/vp9_sad64x64x4d sse2 avx2/;
644
645add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
646specialize qw/vp9_sad32x64x4d sse2/;
647
648add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
649specialize qw/vp9_sad64x32x4d sse2/;
650
651add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
652specialize qw/vp9_sad32x16x4d sse2/;
653
654add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
655specialize qw/vp9_sad16x32x4d sse2/;
656
657add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
658specialize qw/vp9_sad32x32x4d sse2 avx2/;
659
660add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
661specialize qw/vp9_sad16x16x4d sse2/;
662
663add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
664specialize qw/vp9_sad16x8x4d sse2/;
665
666add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
667specialize qw/vp9_sad8x16x4d sse2/;
668
669add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
670specialize qw/vp9_sad8x8x4d sse2/;
671
672# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
673add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
674specialize qw/vp9_sad8x4x4d sse2/;
675
676add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
677specialize qw/vp9_sad4x8x4d sse/;
678
679add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
680specialize qw/vp9_sad4x4x4d sse/;
681
682#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse";
683#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/;
684
685add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
686specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
687
688add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
689specialize qw/vp9_mse8x16/;
690
691add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
692specialize qw/vp9_mse16x8/;
693
694add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
695specialize qw/vp9_mse8x8/;
696
697add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
698specialize qw/vp9_sub_pixel_mse64x64/;
699
700add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
701specialize qw/vp9_sub_pixel_mse32x32/;
702
703add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
704specialize qw/vp9_get_mb_ss mmx sse2/;
705# ENCODEMB INVOKE
706
707add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
708specialize qw/vp9_block_error/, "$sse2_x86inc";
709
710add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
711specialize qw/vp9_subtract_block/, "$sse2_x86inc";
712
713add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
714specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
715
716add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
717specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
718
719#
720# Structured Similarity (SSIM)
721#
722if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
723    add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
724    specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";
725
726    add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
727    specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
728}
729
730# fdct functions
731add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type";
732specialize qw/vp9_fht4x4 sse2 avx2/;
733
734add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type";
735specialize qw/vp9_fht8x8 sse2 avx2/;
736
737add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
738specialize qw/vp9_fht16x16 sse2 avx2/;
739
740add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
741specialize qw/vp9_fwht4x4/;
742
743add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
744specialize qw/vp9_fdct4x4 sse2 avx2/;
745
746add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
747specialize qw/vp9_fdct8x8 sse2 avx2/;
748
749add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
750specialize qw/vp9_fdct16x16 sse2 avx2/;
751
752add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
753specialize qw/vp9_fdct32x32 sse2 avx2/;
754
755add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride";
756specialize qw/vp9_fdct32x32_rd sse2 avx2/;
757
758#
759# Motion search
760#
761add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
762specialize qw/vp9_full_search_sad sse3 sse4_1/;
763$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
764$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
765
766add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
767specialize qw/vp9_refining_search_sad sse3/;
768$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
769
770add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
771specialize qw/vp9_diamond_search_sad sse3/;
772$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
773
774add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
775specialize qw/vp9_full_range_search/;
776
777add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
778specialize qw/vp9_temporal_filter_apply sse2/;
779
780}
781# end encoder functions
7821;
783