179f15823c34ae1e423108295e416213200bb280fAndreas Huber/* 279f15823c34ae1e423108295e416213200bb280fAndreas Huber * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 379f15823c34ae1e423108295e416213200bb280fAndreas Huber * 479f15823c34ae1e423108295e416213200bb280fAndreas Huber * Use of this source code is governed by a BSD-style license 579f15823c34ae1e423108295e416213200bb280fAndreas Huber * that can be found in the LICENSE file in the root of the source 679f15823c34ae1e423108295e416213200bb280fAndreas Huber * tree. An additional intellectual property rights grant can be found 779f15823c34ae1e423108295e416213200bb280fAndreas Huber * in the file PATENTS. All contributing project authors may 879f15823c34ae1e423108295e416213200bb280fAndreas Huber * be found in the AUTHORS file in the root of the source tree. 979f15823c34ae1e423108295e416213200bb280fAndreas Huber */ 1079f15823c34ae1e423108295e416213200bb280fAndreas Huber 111b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h" 121b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/common/variance.h" 1379f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vp8/common/pragmas.h" 1479f15823c34ae1e423108295e416213200bb280fAndreas Huber#include "vpx_ports/mem.h" 1579f15823c34ae1e423108295e416213200bb280fAndreas Huber 1679f15823c34ae1e423108295e416213200bb280fAndreas Huberextern unsigned int vp8_get16x16var_sse2 1779f15823c34ae1e423108295e416213200bb280fAndreas Huber( 1879f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 1979f15823c34ae1e423108295e416213200bb280fAndreas Huber int source_stride, 2079f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 2179f15823c34ae1e423108295e416213200bb280fAndreas Huber int recon_stride, 2279f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *SSE, 2379f15823c34ae1e423108295e416213200bb280fAndreas Huber int *Sum 2479f15823c34ae1e423108295e416213200bb280fAndreas Huber); 2579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_half_horiz_vert_variance16x_h_sse2 2679f15823c34ae1e423108295e416213200bb280fAndreas Huber( 2779f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 2879f15823c34ae1e423108295e416213200bb280fAndreas Huber int ref_pixels_per_line, 2979f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 3079f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 3179f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int Height, 3279f15823c34ae1e423108295e416213200bb280fAndreas Huber int *sum, 3379f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sumsquared 3479f15823c34ae1e423108295e416213200bb280fAndreas Huber); 3579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_half_horiz_variance16x_h_sse2 3679f15823c34ae1e423108295e416213200bb280fAndreas Huber( 3779f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 3879f15823c34ae1e423108295e416213200bb280fAndreas Huber int ref_pixels_per_line, 3979f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 4079f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 4179f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int Height, 4279f15823c34ae1e423108295e416213200bb280fAndreas Huber int *sum, 4379f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sumsquared 4479f15823c34ae1e423108295e416213200bb280fAndreas Huber); 4579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_half_vert_variance16x_h_sse2 4679f15823c34ae1e423108295e416213200bb280fAndreas Huber( 4779f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 4879f15823c34ae1e423108295e416213200bb280fAndreas Huber int ref_pixels_per_line, 4979f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 5079f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 5179f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int Height, 5279f15823c34ae1e423108295e416213200bb280fAndreas Huber int *sum, 5379f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sumsquared 5479f15823c34ae1e423108295e416213200bb280fAndreas Huber); 5579f15823c34ae1e423108295e416213200bb280fAndreas Huberextern void vp8_filter_block2d_bil_var_ssse3 5679f15823c34ae1e423108295e416213200bb280fAndreas Huber( 5779f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *ref_ptr, 5879f15823c34ae1e423108295e416213200bb280fAndreas Huber int ref_pixels_per_line, 5979f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 6079f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 6179f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int Height, 6279f15823c34ae1e423108295e416213200bb280fAndreas Huber int xoffset, 6379f15823c34ae1e423108295e416213200bb280fAndreas Huber int yoffset, 6479f15823c34ae1e423108295e416213200bb280fAndreas Huber int *sum, 6579f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sumsquared 6679f15823c34ae1e423108295e416213200bb280fAndreas Huber); 6779f15823c34ae1e423108295e416213200bb280fAndreas Huber 6879f15823c34ae1e423108295e416213200bb280fAndreas Huberunsigned int vp8_sub_pixel_variance16x16_ssse3 6979f15823c34ae1e423108295e416213200bb280fAndreas Huber( 7079f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 7179f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 7279f15823c34ae1e423108295e416213200bb280fAndreas Huber int xoffset, 7379f15823c34ae1e423108295e416213200bb280fAndreas Huber int yoffset, 7479f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *dst_ptr, 7579f15823c34ae1e423108295e416213200bb280fAndreas Huber int dst_pixels_per_line, 7679f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sse 7779f15823c34ae1e423108295e416213200bb280fAndreas Huber) 7879f15823c34ae1e423108295e416213200bb280fAndreas Huber{ 7979f15823c34ae1e423108295e416213200bb280fAndreas Huber int xsum0; 8079f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int xxsum0; 8179f15823c34ae1e423108295e416213200bb280fAndreas Huber 821b362b15af34006e6a11974088a46d42b903418eJohann /* note we could avoid these if statements if the calling function 831b362b15af34006e6a11974088a46d42b903418eJohann * just called the appropriate functions inside. 841b362b15af34006e6a11974088a46d42b903418eJohann */ 8579f15823c34ae1e423108295e416213200bb280fAndreas Huber if (xoffset == 4 && yoffset == 0) 8679f15823c34ae1e423108295e416213200bb280fAndreas Huber { 8779f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_variance16x_h_sse2( 8879f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 8979f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 9079f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 9179f15823c34ae1e423108295e416213200bb280fAndreas Huber } 9279f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 0 && yoffset == 4) 9379f15823c34ae1e423108295e416213200bb280fAndreas Huber { 9479f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_vert_variance16x_h_sse2( 9579f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 9679f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 9779f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 9879f15823c34ae1e423108295e416213200bb280fAndreas Huber } 9979f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 4 && yoffset == 4) 10079f15823c34ae1e423108295e416213200bb280fAndreas Huber { 10179f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_vert_variance16x_h_sse2( 10279f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 10379f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 10479f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 10579f15823c34ae1e423108295e416213200bb280fAndreas Huber } 10679f15823c34ae1e423108295e416213200bb280fAndreas Huber else 10779f15823c34ae1e423108295e416213200bb280fAndreas Huber { 10879f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_filter_block2d_bil_var_ssse3( 10979f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 11079f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 16, 11179f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 11279f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 11379f15823c34ae1e423108295e416213200bb280fAndreas Huber } 11479f15823c34ae1e423108295e416213200bb280fAndreas Huber 11579f15823c34ae1e423108295e416213200bb280fAndreas Huber *sse = xxsum0; 116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 11779f15823c34ae1e423108295e416213200bb280fAndreas Huber} 11879f15823c34ae1e423108295e416213200bb280fAndreas Huber 11979f15823c34ae1e423108295e416213200bb280fAndreas Huberunsigned int vp8_sub_pixel_variance16x8_ssse3 12079f15823c34ae1e423108295e416213200bb280fAndreas Huber( 12179f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *src_ptr, 12279f15823c34ae1e423108295e416213200bb280fAndreas Huber int src_pixels_per_line, 12379f15823c34ae1e423108295e416213200bb280fAndreas Huber int xoffset, 12479f15823c34ae1e423108295e416213200bb280fAndreas Huber int yoffset, 12579f15823c34ae1e423108295e416213200bb280fAndreas Huber const unsigned char *dst_ptr, 12679f15823c34ae1e423108295e416213200bb280fAndreas Huber int dst_pixels_per_line, 12779f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int *sse 12879f15823c34ae1e423108295e416213200bb280fAndreas Huber 12979f15823c34ae1e423108295e416213200bb280fAndreas Huber) 13079f15823c34ae1e423108295e416213200bb280fAndreas Huber{ 13179f15823c34ae1e423108295e416213200bb280fAndreas Huber int xsum0; 13279f15823c34ae1e423108295e416213200bb280fAndreas Huber unsigned int xxsum0; 13379f15823c34ae1e423108295e416213200bb280fAndreas Huber 13479f15823c34ae1e423108295e416213200bb280fAndreas Huber if (xoffset == 4 && yoffset == 0) 13579f15823c34ae1e423108295e416213200bb280fAndreas Huber { 13679f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_variance16x_h_sse2( 13779f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 13879f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 13979f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 14079f15823c34ae1e423108295e416213200bb280fAndreas Huber } 14179f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 0 && yoffset == 4) 14279f15823c34ae1e423108295e416213200bb280fAndreas Huber { 14379f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_vert_variance16x_h_sse2( 14479f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 14579f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 14679f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 14779f15823c34ae1e423108295e416213200bb280fAndreas Huber } 14879f15823c34ae1e423108295e416213200bb280fAndreas Huber else if (xoffset == 4 && yoffset == 4) 14979f15823c34ae1e423108295e416213200bb280fAndreas Huber { 15079f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_half_horiz_vert_variance16x_h_sse2( 15179f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 15279f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 15379f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 15479f15823c34ae1e423108295e416213200bb280fAndreas Huber } 15579f15823c34ae1e423108295e416213200bb280fAndreas Huber else 15679f15823c34ae1e423108295e416213200bb280fAndreas Huber { 15779f15823c34ae1e423108295e416213200bb280fAndreas Huber vp8_filter_block2d_bil_var_ssse3( 15879f15823c34ae1e423108295e416213200bb280fAndreas Huber src_ptr, src_pixels_per_line, 15979f15823c34ae1e423108295e416213200bb280fAndreas Huber dst_ptr, dst_pixels_per_line, 8, 16079f15823c34ae1e423108295e416213200bb280fAndreas Huber xoffset, yoffset, 16179f15823c34ae1e423108295e416213200bb280fAndreas Huber &xsum0, &xxsum0); 16279f15823c34ae1e423108295e416213200bb280fAndreas Huber } 16379f15823c34ae1e423108295e416213200bb280fAndreas Huber 16479f15823c34ae1e423108295e416213200bb280fAndreas Huber *sse = xxsum0; 165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 16679f15823c34ae1e423108295e416213200bb280fAndreas Huber} 167