1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "vpx_config.h" 12#include "vp8/common/variance.h" 13#include "vpx_ports/mem.h" 14#include "vp8/common/x86/filter_x86.h" 15 16extern void filter_block1d_h6_mmx 17( 18 const unsigned char *src_ptr, 19 unsigned short *output_ptr, 20 unsigned int src_pixels_per_line, 21 unsigned int pixel_step, 22 unsigned int output_height, 23 unsigned int output_width, 24 short *filter 25); 26extern void filter_block1d_v6_mmx 27( 28 const short *src_ptr, 29 unsigned char *output_ptr, 30 unsigned int pixels_per_line, 31 unsigned int pixel_step, 32 unsigned int output_height, 33 unsigned int output_width, 34 short *filter 35); 36 37extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr); 38extern unsigned int vp8_get8x8var_mmx 39( 40 const unsigned char *src_ptr, 41 int source_stride, 42 const unsigned char *ref_ptr, 43 int recon_stride, 44 unsigned int *SSE, 45 int *Sum 46); 47extern unsigned int vp8_get4x4var_mmx 48( 49 const unsigned char *src_ptr, 50 int source_stride, 51 const unsigned char *ref_ptr, 52 int recon_stride, 53 unsigned int *SSE, 54 int *Sum 55); 56extern void vp8_filter_block2d_bil4x4_var_mmx 57( 58 const unsigned char *ref_ptr, 59 int ref_pixels_per_line, 60 const unsigned char *src_ptr, 61 int src_pixels_per_line, 62 const short *HFilter, 63 const short *VFilter, 64 int *sum, 65 unsigned int *sumsquared 66); 67extern void vp8_filter_block2d_bil_var_mmx 68( 69 const unsigned char *ref_ptr, 70 int ref_pixels_per_line, 71 const unsigned char *src_ptr, 72 int src_pixels_per_line, 73 unsigned int Height, 74 const short *HFilter, 75 const short *VFilter, 76 int *sum, 77 unsigned int *sumsquared 78); 79 80 81unsigned int vp8_variance4x4_mmx( 82 const unsigned char *src_ptr, 83 int source_stride, 84 const unsigned char *ref_ptr, 85 int recon_stride, 86 unsigned int *sse) 87{ 88 unsigned int var; 89 int avg; 90 91 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 92 *sse = var; 93 return (var - (((unsigned int)avg * avg) >> 4)); 94 95} 96 97unsigned int vp8_variance8x8_mmx( 98 const unsigned char *src_ptr, 99 int source_stride, 100 const unsigned char *ref_ptr, 101 int recon_stride, 102 unsigned int *sse) 103{ 104 unsigned int var; 105 int avg; 106 107 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 108 *sse = var; 109 110 return (var - (((unsigned int)avg * avg) >> 6)); 111 112} 113 114unsigned int vp8_mse16x16_mmx( 115 const unsigned char *src_ptr, 116 int source_stride, 117 const unsigned char *ref_ptr, 118 int recon_stride, 119 unsigned int *sse) 120{ 121 unsigned int sse0, sse1, sse2, sse3, var; 122 int sum0, sum1, sum2, sum3; 123 124 125 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 126 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 127 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 128 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 129 130 var = sse0 + sse1 + sse2 + sse3; 131 *sse = var; 132 return var; 133} 134 135 136unsigned int vp8_variance16x16_mmx( 137 const unsigned char *src_ptr, 138 int source_stride, 139 const unsigned char *ref_ptr, 140 int recon_stride, 141 unsigned int *sse) 142{ 143 unsigned int sse0, sse1, sse2, sse3, var; 144 int sum0, sum1, sum2, sum3, avg; 145 146 147 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 148 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 149 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 150 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 151 152 var = sse0 + sse1 + sse2 + sse3; 153 avg = sum0 + sum1 + sum2 + sum3; 154 *sse = var; 155 return (var - (((unsigned int)avg * avg) >> 8)); 156} 157 158unsigned int vp8_variance16x8_mmx( 159 const unsigned char *src_ptr, 160 int source_stride, 161 const unsigned char *ref_ptr, 162 int recon_stride, 163 unsigned int *sse) 164{ 165 unsigned int sse0, sse1, var; 166 int sum0, sum1, avg; 167 168 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 169 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 170 171 var = sse0 + sse1; 172 avg = sum0 + sum1; 173 *sse = var; 174 return (var - (((unsigned int)avg * avg) >> 7)); 175 176} 177 178 179unsigned int vp8_variance8x16_mmx( 180 const unsigned char *src_ptr, 181 int source_stride, 182 const unsigned char *ref_ptr, 183 int recon_stride, 184 unsigned int *sse) 185{ 186 unsigned int sse0, sse1, var; 187 int sum0, sum1, avg; 188 189 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 190 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; 191 192 var = sse0 + sse1; 193 avg = sum0 + sum1; 194 *sse = var; 195 196 return (var - (((unsigned int)avg * avg) >> 7)); 197 198} 199 200 201unsigned int vp8_sub_pixel_variance4x4_mmx 202( 203 const unsigned char *src_ptr, 204 int src_pixels_per_line, 205 int xoffset, 206 int yoffset, 207 const unsigned char *dst_ptr, 208 int dst_pixels_per_line, 209 unsigned int *sse) 210 211{ 212 int xsum; 213 unsigned int xxsum; 214 vp8_filter_block2d_bil4x4_var_mmx( 215 src_ptr, src_pixels_per_line, 216 dst_ptr, dst_pixels_per_line, 217 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 218 &xsum, &xxsum 219 ); 220 *sse = xxsum; 221 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); 222} 223 224 225unsigned int vp8_sub_pixel_variance8x8_mmx 226( 227 const unsigned char *src_ptr, 228 int src_pixels_per_line, 229 int xoffset, 230 int yoffset, 231 const unsigned char *dst_ptr, 232 int dst_pixels_per_line, 233 unsigned int *sse 234) 235{ 236 237 int xsum; 238 unsigned int xxsum; 239 vp8_filter_block2d_bil_var_mmx( 240 src_ptr, src_pixels_per_line, 241 dst_ptr, dst_pixels_per_line, 8, 242 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 243 &xsum, &xxsum 244 ); 245 *sse = xxsum; 246 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); 247} 248 249unsigned int vp8_sub_pixel_variance16x16_mmx 250( 251 const unsigned char *src_ptr, 252 int src_pixels_per_line, 253 int xoffset, 254 int yoffset, 255 const unsigned char *dst_ptr, 256 int dst_pixels_per_line, 257 unsigned int *sse 258) 259{ 260 261 int xsum0, xsum1; 262 unsigned int xxsum0, xxsum1; 263 264 265 vp8_filter_block2d_bil_var_mmx( 266 src_ptr, src_pixels_per_line, 267 dst_ptr, dst_pixels_per_line, 16, 268 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 269 &xsum0, &xxsum0 270 ); 271 272 273 vp8_filter_block2d_bil_var_mmx( 274 src_ptr + 8, src_pixels_per_line, 275 dst_ptr + 8, dst_pixels_per_line, 16, 276 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 277 &xsum1, &xxsum1 278 ); 279 280 xsum0 += xsum1; 281 xxsum0 += xxsum1; 282 283 *sse = xxsum0; 284 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 285 286 287} 288 289unsigned int vp8_sub_pixel_mse16x16_mmx( 290 const unsigned char *src_ptr, 291 int src_pixels_per_line, 292 int xoffset, 293 int yoffset, 294 const unsigned char *dst_ptr, 295 int dst_pixels_per_line, 296 unsigned int *sse 297) 298{ 299 vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 300 return *sse; 301} 302 303unsigned int vp8_sub_pixel_variance16x8_mmx 304( 305 const unsigned char *src_ptr, 306 int src_pixels_per_line, 307 int xoffset, 308 int yoffset, 309 const unsigned char *dst_ptr, 310 int dst_pixels_per_line, 311 unsigned int *sse 312) 313{ 314 int xsum0, xsum1; 315 unsigned int xxsum0, xxsum1; 316 317 318 vp8_filter_block2d_bil_var_mmx( 319 src_ptr, src_pixels_per_line, 320 dst_ptr, dst_pixels_per_line, 8, 321 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 322 &xsum0, &xxsum0 323 ); 324 325 326 vp8_filter_block2d_bil_var_mmx( 327 src_ptr + 8, src_pixels_per_line, 328 dst_ptr + 8, dst_pixels_per_line, 8, 329 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 330 &xsum1, &xxsum1 331 ); 332 333 xsum0 += xsum1; 334 xxsum0 += xxsum1; 335 336 *sse = xxsum0; 337 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 338} 339 340unsigned int vp8_sub_pixel_variance8x16_mmx 341( 342 const unsigned char *src_ptr, 343 int src_pixels_per_line, 344 int xoffset, 345 int yoffset, 346 const unsigned char *dst_ptr, 347 int dst_pixels_per_line, 348 unsigned int *sse 349) 350{ 351 int xsum; 352 unsigned int xxsum; 353 vp8_filter_block2d_bil_var_mmx( 354 src_ptr, src_pixels_per_line, 355 dst_ptr, dst_pixels_per_line, 16, 356 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 357 &xsum, &xxsum 358 ); 359 *sse = xxsum; 360 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); 361} 362 363 364unsigned int vp8_variance_halfpixvar16x16_h_mmx( 365 const unsigned char *src_ptr, 366 int source_stride, 367 const unsigned char *ref_ptr, 368 int recon_stride, 369 unsigned int *sse) 370{ 371 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, 372 ref_ptr, recon_stride, sse); 373} 374 375 376unsigned int vp8_variance_halfpixvar16x16_v_mmx( 377 const unsigned char *src_ptr, 378 int source_stride, 379 const unsigned char *ref_ptr, 380 int recon_stride, 381 unsigned int *sse) 382{ 383 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, 384 ref_ptr, recon_stride, sse); 385} 386 387 388unsigned int vp8_variance_halfpixvar16x16_hv_mmx( 389 const unsigned char *src_ptr, 390 int source_stride, 391 const unsigned char *ref_ptr, 392 int recon_stride, 393 unsigned int *sse) 394{ 395 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, 396 ref_ptr, recon_stride, sse); 397} 398