1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_config.h" 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8/common/variance.h" 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8/common/pragmas.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8/common/x86/filter_x86.h" 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 18233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 19233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 20233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan 22233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void vp8_filter_block2d_bil4x4_var_mmx 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *HFilter, 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *VFilter, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan 34233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp8_get4x4var_mmx 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan 44233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_get_mb_ss_sse2 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *src_ptr 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 48233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_get16x16var_sse2 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 57233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_get8x8var_sse2 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 66233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_filter_block2d_bil_var_sse2 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 78233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_half_horiz_vert_variance8x_h_sse2 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 88233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_half_horiz_vert_variance16x_h_sse2 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 98233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_half_horiz_variance8x_h_sse2 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 108233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_half_horiz_variance16x_h_sse2 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 118233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_half_vert_variance8x_h_sse2 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 128233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp8_half_vert_variance16x_h_sse2 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan 139233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance4x4_wmt( 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 4)); 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan 155233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance8x8_wmt 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 6)); 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan 173233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance16x16_wmt 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0; 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0; 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = sse0; 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 189233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_mse16x16_wmt( 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0; 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0; 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = sse0; 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse0; 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan 206233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance16x8_wmt 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, var; 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, avg; 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1; 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1; 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan 227233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance8x16_wmt 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, var; 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, avg; 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1; 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1; 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan 248233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance4x4_wmt 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum; 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum; 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil4x4_var_mmx( 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum; 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum - (((unsigned int)xsum * xsum) >> 4)); 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan 272233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance8x8_wmt 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum; 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum; 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (xoffset == 4 && yoffset == 0) 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_variance8x_h_sse2( 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 0 && yoffset == 4) 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_vert_variance8x_h_sse2( 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 4 && yoffset == 4) 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_vert_variance8x_h_sse2( 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan else 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_sse2( 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum; 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum - (((unsigned int)xsum * xsum) >> 6)); 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan 320233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance16x16_wmt 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0, xsum1; 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0, xxsum1; 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* note we could avoid these if statements if the calling function 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan * just called the appropriate functions inside. 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (xoffset == 4 && yoffset == 0) 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_variance16x_h_sse2( 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 0 && yoffset == 4) 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_vert_variance16x_h_sse2( 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 350233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 4 && yoffset == 4) 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_vert_variance16x_h_sse2( 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan else 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_sse2( 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_sse2( 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr + 8, src_pixels_per_line, 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr + 8, dst_pixels_per_line, 16, 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum1, &xxsum1 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan xsum0 += xsum1; 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan xxsum0 += xxsum1; 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 377233d2500723e5594f3e7c70896ffeeef32b9c950ywan 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan 382233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_mse16x16_wmt( 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 389233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan return *sse; 394233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan 396233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance16x8_wmt 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 398233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 399233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 400233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 401233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 402233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 403233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 404233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 405233d2500723e5594f3e7c70896ffeeef32b9c950ywan 406233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 407233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 408233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0, xsum1; 409233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0, xxsum1; 410233d2500723e5594f3e7c70896ffeeef32b9c950ywan 411233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (xoffset == 4 && yoffset == 0) 412233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 413233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_variance16x_h_sse2( 414233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 415233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 416233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 417233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 418233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 0 && yoffset == 4) 419233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 420233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_vert_variance16x_h_sse2( 421233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 422233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 423233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 424233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 425233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 4 && yoffset == 4) 426233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 427233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_vert_variance16x_h_sse2( 428233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 429233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 430233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 431233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 432233d2500723e5594f3e7c70896ffeeef32b9c950ywan else 433233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 434233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_sse2( 435233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 436233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 437233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, 438233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 439233d2500723e5594f3e7c70896ffeeef32b9c950ywan 440233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_sse2( 441233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr + 8, src_pixels_per_line, 442233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr + 8, dst_pixels_per_line, 8, 443233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, 444233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum1, &xxsum1); 445233d2500723e5594f3e7c70896ffeeef32b9c950ywan xsum0 += xsum1; 446233d2500723e5594f3e7c70896ffeeef32b9c950ywan xxsum0 += xxsum1; 447233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 448233d2500723e5594f3e7c70896ffeeef32b9c950ywan 449233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 450233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 451233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 452233d2500723e5594f3e7c70896ffeeef32b9c950ywan 453233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance8x16_wmt 454233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 455233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 456233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 457233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 458233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 459233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 460233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 461233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 462233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 463233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 464233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum; 465233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum; 466233d2500723e5594f3e7c70896ffeeef32b9c950ywan 467233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (xoffset == 4 && yoffset == 0) 468233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 469233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_variance8x_h_sse2( 470233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 471233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 472233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 473233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 474233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 0 && yoffset == 4) 475233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 476233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_vert_variance8x_h_sse2( 477233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 478233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 479233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 480233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 481233d2500723e5594f3e7c70896ffeeef32b9c950ywan else if (xoffset == 4 && yoffset == 4) 482233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 483233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_vert_variance8x_h_sse2( 484233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 485233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 486233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 487233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 488233d2500723e5594f3e7c70896ffeeef32b9c950ywan else 489233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 490233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_sse2( 491233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 492233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 493233d2500723e5594f3e7c70896ffeeef32b9c950ywan xoffset, yoffset, 494233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum); 495233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 496233d2500723e5594f3e7c70896ffeeef32b9c950ywan 497233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum; 498233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum - (((unsigned int)xsum * xsum) >> 7)); 499233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 500233d2500723e5594f3e7c70896ffeeef32b9c950ywan 501233d2500723e5594f3e7c70896ffeeef32b9c950ywan 502233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_h_wmt( 503233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 504233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 505233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 506233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 507233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 508233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 509233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0; 510233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0; 511233d2500723e5594f3e7c70896ffeeef32b9c950ywan 512233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_variance16x_h_sse2( 513233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 514233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 515233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 516233d2500723e5594f3e7c70896ffeeef32b9c950ywan 517233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 518233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 519233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 520233d2500723e5594f3e7c70896ffeeef32b9c950ywan 521233d2500723e5594f3e7c70896ffeeef32b9c950ywan 522233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_v_wmt( 523233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 524233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 525233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 526233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 527233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 528233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 529233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0; 530233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0; 531233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_vert_variance16x_h_sse2( 532233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 533233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 534233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 535233d2500723e5594f3e7c70896ffeeef32b9c950ywan 536233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 537233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 538233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 539233d2500723e5594f3e7c70896ffeeef32b9c950ywan 540233d2500723e5594f3e7c70896ffeeef32b9c950ywan 541233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_hv_wmt( 542233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 543233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 544233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 545233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 546233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 547233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 548233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0; 549233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0; 550233d2500723e5594f3e7c70896ffeeef32b9c950ywan 551233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_half_horiz_vert_variance16x_h_sse2( 552233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 553233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 554233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 555233d2500723e5594f3e7c70896ffeeef32b9c950ywan 556233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 557233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 558233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 559