190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/* 2f71323e297a928af368937089d3ed71239786f86Andreas Huber * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 4f71323e297a928af368937089d3ed71239786f86Andreas Huber * Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber * that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber * tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber * in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber * be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */ 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 111b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h" 121b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/variance.h" 1379f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vp8/common/pragmas.h" 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "vpx_ports/mem.h" 151b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/x86/filter_x86.h" 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 171b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 181b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 191b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 201b362b15af34006e6a11974088a46d42b903418eJohannextern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vp8_filter_block2d_bil4x4_var_mmx 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 24538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int ref_pixels_per_line, 26538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const short *HFilter, 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const short *VFilter, 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *sum, 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sumsquared 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned int vp8_get4x4var_mmx 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 36538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 38538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *SSE, 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *Sum 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_get_mb_ss_sse2 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 46538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const short *src_ptr 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_get16x16var_sse2 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 50538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 51538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int source_stride, 52538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 53538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int recon_stride, 54538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *SSE, 55538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int *Sum 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_get8x8var_sse2 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 59538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 60538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int source_stride, 61538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 62538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int recon_stride, 63538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *SSE, 64538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int *Sum 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_filter_block2d_bil_var_sse2 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 68538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int ref_pixels_per_line, 70538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int Height, 7379f15823c34ae1e423108295e416213200bb280fAndreas Huber int xoffset, 7479f15823c34ae1e423108295e416213200bb280fAndreas Huber int yoffset, 7579f15823c34ae1e423108295e416213200bb280fAndreas Huber int *sum, 7679f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sumsquared 7779f15823c34ae1e423108295e416213200bb280fAndreas Huber); 7879f15823c34ae1e423108295e416213200bb280fAndreas Hubervoid vp8_half_horiz_vert_variance8x_h_sse2 7979f15823c34ae1e423108295e416213200bb280fAndreas Huber( 8079f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 8179f15823c34ae1e423108295e416213200bb280fAndreas Huber int ref_pixels_per_line, 8279f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 8379f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 8479f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int Height, 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *sum, 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sumsquared 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_half_horiz_vert_variance16x_h_sse2 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 90538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int ref_pixels_per_line, 92538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int Height, 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *sum, 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sumsquared 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 9879f15823c34ae1e423108295e416213200bb280fAndreas Hubervoid vp8_half_horiz_variance8x_h_sse2 9979f15823c34ae1e423108295e416213200bb280fAndreas Huber( 10079f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 10179f15823c34ae1e423108295e416213200bb280fAndreas Huber int ref_pixels_per_line, 10279f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 10379f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 10479f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int Height, 10579f15823c34ae1e423108295e416213200bb280fAndreas Huber int *sum, 10679f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sumsquared 10779f15823c34ae1e423108295e416213200bb280fAndreas Huber); 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_half_horiz_variance16x_h_sse2 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 110538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int ref_pixels_per_line, 112538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int Height, 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *sum, 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sumsquared 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 11879f15823c34ae1e423108295e416213200bb280fAndreas Hubervoid vp8_half_vert_variance8x_h_sse2 11979f15823c34ae1e423108295e416213200bb280fAndreas Huber( 12079f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 12179f15823c34ae1e423108295e416213200bb280fAndreas Huber int ref_pixels_per_line, 12279f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 12379f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 12479f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int Height, 12579f15823c34ae1e423108295e416213200bb280fAndreas Huber int *sum, 12679f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sumsquared 12779f15823c34ae1e423108295e416213200bb280fAndreas Huber); 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vp8_half_vert_variance16x_h_sse2 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 130538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int ref_pixels_per_line, 132538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int Height, 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int *sum, 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sumsquared 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber); 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance4x4_wmt( 140538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 142538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 1431b362b15af34006e6a11974088a46d42b903418eJohann int recon_stride, 1441b362b15af34006e6a11974088a46d42b903418eJohann unsigned int *sse) 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int var; 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int avg; 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 1501b362b15af34006e6a11974088a46d42b903418eJohann *sse = var; 151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 4)); 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x8_wmt 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 157538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 159538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 1601b362b15af34006e6a11974088a46d42b903418eJohann int recon_stride, 1611b362b15af34006e6a11974088a46d42b903418eJohann unsigned int *sse) 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int var; 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int avg; 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 1671b362b15af34006e6a11974088a46d42b903418eJohann *sse = var; 168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 6)); 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x16_wmt 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 175538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 177538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0; 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0; 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = sse0; 187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_mse16x16_wmt( 190538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 192538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0; 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0; 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = sse0; 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber return sse0; 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance16x8_wmt 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 208538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 210538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0, sse1, var; 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0, sum1, avg; 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber var = sse0 + sse1; 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber avg = sum0 + sum1; 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 7)); 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_variance8x16_wmt 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 229538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int source_stride, 231538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *ref_ptr, 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int recon_stride, 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse) 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int sse0, sse1, var; 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int sum0, sum1, avg; 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber var = sse0 + sse1; 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber avg = sum0 + sum1; 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = var; 244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 7)); 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance4x4_wmt 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 250538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 254538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum; 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum; 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil4x4_var_mmx( 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 2641b362b15af34006e6a11974088a46d42b903418eJohann vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum, &xxsum 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum; 268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum - (((unsigned int)xsum * xsum) >> 4)); 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x8_wmt 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 274538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 278538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum; 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum; 28579f15823c34ae1e423108295e416213200bb280fAndreas Huber 28679f15823c34ae1e423108295e416213200bb280fAndreas Huber if (xoffset == 4 && yoffset == 0) 28779f15823c34ae1e423108295e416213200bb280fAndreas Huber { 28879f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_variance8x_h_sse2( 28979f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 29079f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 29179f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 29279f15823c34ae1e423108295e416213200bb280fAndreas Huber } 29379f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 0 && yoffset == 4) 29479f15823c34ae1e423108295e416213200bb280fAndreas Huber { 29579f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_vert_variance8x_h_sse2( 29679f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 29779f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 29879f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 29979f15823c34ae1e423108295e416213200bb280fAndreas Huber } 30079f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 4 && yoffset == 4) 30179f15823c34ae1e423108295e416213200bb280fAndreas Huber { 30279f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_vert_variance8x_h_sse2( 30379f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 30479f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 30579f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 30679f15823c34ae1e423108295e416213200bb280fAndreas Huber } 30779f15823c34ae1e423108295e416213200bb280fAndreas Huber else 30879f15823c34ae1e423108295e416213200bb280fAndreas Huber { 30979f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_filter_block2d_bil_var_sse2( 31079f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 31179f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 31279f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 31379f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 31479f15823c34ae1e423108295e416213200bb280fAndreas Huber } 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum; 317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum - (((unsigned int)xsum * xsum) >> 6)); 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x16_wmt 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 322538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 326538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum0, xsum1; 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum0, xxsum1; 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3351b362b15af34006e6a11974088a46d42b903418eJohann /* note we could avoid these if statements if the calling function 3361b362b15af34006e6a11974088a46d42b903418eJohann * just called the appropriate functions inside. 3371b362b15af34006e6a11974088a46d42b903418eJohann */ 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber if (xoffset == 4 && yoffset == 0) 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_half_horiz_variance16x_h_sse2( 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 16, 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum0, &xxsum0); 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber else if (xoffset == 0 && yoffset == 4) 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_half_vert_variance16x_h_sse2( 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 16, 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum0, &xxsum0); 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber else if (xoffset == 4 && yoffset == 4) 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_half_horiz_vert_variance16x_h_sse2( 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 16, 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum0, &xxsum0); 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber else 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_sse2( 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr, src_pixels_per_line, 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr, dst_pixels_per_line, 16, 36479f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum0, &xxsum0 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_filter_block2d_bil_var_sse2( 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src_ptr + 8, src_pixels_per_line, 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber dst_ptr + 8, dst_pixels_per_line, 16, 37179f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber &xsum1, &xxsum1 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ); 37479f15823c34ae1e423108295e416213200bb280fAndreas Huber xsum0 += xsum1; 37579f15823c34ae1e423108295e416213200bb280fAndreas Huber xxsum0 += xxsum1; 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum0; 379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_mse16x16_wmt( 383538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 387538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber return *sse; 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance16x8_wmt 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 398538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 402538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum0, xsum1; 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum0, xxsum1; 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41179f15823c34ae1e423108295e416213200bb280fAndreas Huber if (xoffset == 4 && yoffset == 0) 41279f15823c34ae1e423108295e416213200bb280fAndreas Huber { 41379f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_variance16x_h_sse2( 41479f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 41579f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 41679f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 41779f15823c34ae1e423108295e416213200bb280fAndreas Huber } 41879f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 0 && yoffset == 4) 41979f15823c34ae1e423108295e416213200bb280fAndreas Huber { 42079f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_vert_variance16x_h_sse2( 42179f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 42279f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 42379f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 42479f15823c34ae1e423108295e416213200bb280fAndreas Huber } 42579f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 4 && yoffset == 4) 42679f15823c34ae1e423108295e416213200bb280fAndreas Huber { 42779f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_vert_variance16x_h_sse2( 42879f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 42979f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 43079f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 43179f15823c34ae1e423108295e416213200bb280fAndreas Huber } 43279f15823c34ae1e423108295e416213200bb280fAndreas Huber else 43379f15823c34ae1e423108295e416213200bb280fAndreas Huber { 43479f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_filter_block2d_bil_var_sse2( 43579f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 43679f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 43779f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 43879f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44079f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_filter_block2d_bil_var_sse2( 44179f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr + 8, src_pixels_per_line, 44279f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr + 8, dst_pixels_per_line, 8, 44379f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 44479f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum1, &xxsum1); 44579f15823c34ae1e423108295e416213200bb280fAndreas Huber xsum0 += xsum1; 44679f15823c34ae1e423108295e416213200bb280fAndreas Huber xxsum0 += xxsum1; 44779f15823c34ae1e423108295e416213200bb280fAndreas Huber } 44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum0; 450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned int vp8_sub_pixel_variance8x16_wmt 45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 455538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int src_pixels_per_line, 45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xoffset, 45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int yoffset, 459538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int dst_pixels_per_line, 46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int *sse 46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber int xsum; 46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int xxsum; 46679f15823c34ae1e423108295e416213200bb280fAndreas Huber 46779f15823c34ae1e423108295e416213200bb280fAndreas Huber if (xoffset == 4 && yoffset == 0) 46879f15823c34ae1e423108295e416213200bb280fAndreas Huber { 46979f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_variance8x_h_sse2( 47079f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 47179f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 47279f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 47379f15823c34ae1e423108295e416213200bb280fAndreas Huber } 47479f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 0 && yoffset == 4) 47579f15823c34ae1e423108295e416213200bb280fAndreas Huber { 47679f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_vert_variance8x_h_sse2( 47779f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 47879f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 47979f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 48079f15823c34ae1e423108295e416213200bb280fAndreas Huber } 48179f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 4 && yoffset == 4) 48279f15823c34ae1e423108295e416213200bb280fAndreas Huber { 48379f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_vert_variance8x_h_sse2( 48479f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 48579f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 48679f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 48779f15823c34ae1e423108295e416213200bb280fAndreas Huber } 48879f15823c34ae1e423108295e416213200bb280fAndreas Huber else 48979f15823c34ae1e423108295e416213200bb280fAndreas Huber { 49079f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_filter_block2d_bil_var_sse2( 49179f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 49279f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 49379f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 49479f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum, &xxsum); 49579f15823c34ae1e423108295e416213200bb280fAndreas Huber } 49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *sse = xxsum; 498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum - (((unsigned int)xsum * xsum) >> 7)); 49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 501538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 502538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_h_wmt( 503538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 504538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int src_pixels_per_line, 505538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 506538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int dst_pixels_per_line, 507538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *sse) 508538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{ 50979f15823c34ae1e423108295e416213200bb280fAndreas Huber int xsum0; 51079f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int xxsum0; 511538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 512538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber vp8_half_horiz_variance16x_h_sse2( 513538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber src_ptr, src_pixels_per_line, 514538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber dst_ptr, dst_pixels_per_line, 16, 515538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber &xsum0, &xxsum0); 516538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 517538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber *sse = xxsum0; 518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 519538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber} 520538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 521538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 522538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_v_wmt( 523538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 524538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int src_pixels_per_line, 525538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 526538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int dst_pixels_per_line, 527538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *sse) 528538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{ 52979f15823c34ae1e423108295e416213200bb280fAndreas Huber int xsum0; 53079f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int xxsum0; 531538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber vp8_half_vert_variance16x_h_sse2( 532538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber src_ptr, src_pixels_per_line, 533538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber dst_ptr, dst_pixels_per_line, 16, 534538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber &xsum0, &xxsum0); 535538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 536538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber *sse = xxsum0; 537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 538538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber} 539538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 540538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 541538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huberunsigned int vp8_variance_halfpixvar16x16_hv_wmt( 542538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *src_ptr, 543538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int src_pixels_per_line, 544538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber const unsigned char *dst_ptr, 545538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber int dst_pixels_per_line, 546538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber unsigned int *sse) 547538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber{ 54879f15823c34ae1e423108295e416213200bb280fAndreas Huber int xsum0; 54979f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int xxsum0; 550538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 551538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber vp8_half_horiz_vert_variance16x_h_sse2( 552538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber src_ptr, src_pixels_per_line, 553538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber dst_ptr, dst_pixels_per_line, 16, 554538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber &xsum0, &xxsum0); 555538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 556538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber *sse = xxsum0; 557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 558538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber} 559