190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/* 2f71323e297a928af368937089d3ed71239786f86Andreas Huber * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 4f71323e297a928af368937089d3ed71239786f86Andreas Huber * Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber * that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber * tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber * in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber * be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */ 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 111b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h" 121b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/variance.h" 1379f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vp8/common/pragmas.h" 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "vpx_ports/mem.h" 151b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/x86/filter_x86.h" 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void filter_block1d_h6_mmx 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 19538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned short *output_ptr, 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int src_pixels_per_line, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int pixel_step, 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int output_height, 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int output_width, 251b362b15af34006e6a11974088a46d42b903418eJohann short *filter 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void filter_block1d_v6_mmx 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 29538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const short *src_ptr, 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *output_ptr, 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int pixels_per_line, 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int pixel_step, 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int output_height, 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int output_width, 351b362b15af34006e6a11974088a46d42b903418eJohann short *filter 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 381b362b15af34006e6a11974088a46d42b903418eJohannextern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr); 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned int vp8_get8x8var_mmx 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 41538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 43538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *SSE, 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *Sum 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned int vp8_get4x4var_mmx 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 50538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 52538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *SSE, 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *Sum 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vp8_filter_block2d_bil4x4_var_mmx 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int ref_pixels_per_line, 61538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const short *HFilter, 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const short *VFilter, 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *sum, 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sumsquared 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vp8_filter_block2d_bil_var_mmx 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 70538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int ref_pixels_per_line, 72538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int Height, 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const short *HFilter, 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const short *VFilter, 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *sum, 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sumsquared 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance4x4_mmx( 83538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 85538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int var; 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int avg; 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 4)); 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x8_mmx( 99538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 101538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int var; 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int avg; 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 6)); 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_mse16x16_mmx( 116538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 118538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0, sse1, sse2, sse3, var; 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0, sum1, sum2, sum3; 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber var = sse0 + sse1 + sse2 + sse3; 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber return var; 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x16_mmx( 138538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 140538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 1421b362b15af34006e6a11974088a46d42b903418eJohann unsigned int *sse) 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0, sse1, sse2, sse3, var; 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0, sum1, sum2, sum3, avg; 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber var = sse0 + sse1 + sse2 + sse3; 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber avg = sum0 + sum1 + sum2 + sum3; 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 8)); 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x8_mmx( 160538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 162538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0, sse1, var; 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0, sum1, avg; 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber var = sse0 + sse1; 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber avg = sum0 + sum1; 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 7)); 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x16_mmx( 181538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 183538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0, sse1, var; 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0, sum1, avg; 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber var = sse0 + sse1; 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber avg = sum0 + sum1; 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 7)); 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance4x4_mmx 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 204538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 208538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum; 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum; 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil4x4_var_mmx( 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 2181b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum, &xxsum 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum; 222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum - (((unsigned int)xsum * xsum) >> 4)); 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x8_mmx 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 228538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 232538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum; 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum; 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_mmx( 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 8, 2431b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum, &xxsum 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum; 247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum - (((unsigned int)xsum * xsum) >> 6)); 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x16_mmx 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 252538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 256538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum0, xsum1; 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum0, xxsum1; 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_mmx( 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 16, 2691b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum0, &xxsum0 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_mmx( 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr + 8, src_pixels_per_line, 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr + 8, dst_pixels_per_line, 16, 2771b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum1, &xxsum1 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xsum0 += xsum1; 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xxsum0 += xxsum1; 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum0; 285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_mse16x16_mmx( 291538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 295538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber return *sse; 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x8_mmx 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 306538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 310538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum0, xsum1; 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum0, xxsum1; 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_mmx( 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 8, 3221b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum0, &xxsum0 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_mmx( 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr + 8, src_pixels_per_line, 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr + 8, dst_pixels_per_line, 8, 3301b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum1, &xxsum1 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xsum0 += xsum1; 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xxsum0 += xxsum1; 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum0; 338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x16_mmx 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 343538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 347538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 3491b362b15af34006e6a11974088a46d42b903418eJohann unsigned int *sse 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum; 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum; 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_mmx( 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 16, 3571b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum, &xxsum 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum; 361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum - (((unsigned int)xsum * xsum) >> 7)); 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 363538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 364538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 365538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_h_mmx( 366538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 367538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int source_stride, 368538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 369538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int recon_stride, 370538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *sse) 371538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{ 372538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, 373538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber ref_ptr, recon_stride, sse); 374538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber} 375538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 376538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 377538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_v_mmx( 378538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 379538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int source_stride, 380538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 381538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int recon_stride, 382538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *sse) 383538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{ 384538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, 385538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber ref_ptr, recon_stride, sse); 386538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber} 387538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 388538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 389538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_hv_mmx( 390538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 391538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int source_stride, 392538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 393538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int recon_stride, 394538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *sse) 395538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{ 396538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, 397538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber ref_ptr, recon_stride, sse); 398538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber} 399