1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_config.h" 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8/common/variance.h" 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8/common/pragmas.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8/common/x86/filter_x86.h" 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void filter_block1d_h6_mmx 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned short *output_ptr, 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int src_pixels_per_line, 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int pixel_step, 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_height, 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_width, 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan short *filter 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 27233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void filter_block1d_v6_mmx 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *src_ptr, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *output_ptr, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int pixels_per_line, 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int pixel_step, 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_height, 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int output_width, 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan short *filter 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan 38233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr); 39233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp8_get8x8var_mmx 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 48233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp8_get4x4var_mmx 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 57233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void vp8_filter_block2d_bil4x4_var_mmx 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *HFilter, 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *VFilter, 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 68233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern void vp8_filter_block2d_bil_var_mmx 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *HFilter, 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan const short *VFilter, 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan 82233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance4x4_mmx( 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 4)); 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan 98233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance8x8_mmx( 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 6)); 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan 115233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_mse16x16_mmx( 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, sse2, sse3, var; 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, sum2, sum3; 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1 + sse2 + sse3; 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan return var; 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan 137233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance16x16_mmx( 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, sse2, sse3, var; 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, sum2, sum3, avg; 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1 + sse2 + sse3; 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1 + sum2 + sum3; 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 8)); 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan 159233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance16x8_mmx( 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, var; 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, avg; 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1; 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1; 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan 180233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance8x16_mmx( 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, var; 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, avg; 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1; 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1; 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan 202233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance4x4_mmx 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum; 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum; 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil4x4_var_mmx( 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum; 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum - (((unsigned int)xsum * xsum) >> 4)); 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan 226233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance8x8_mmx 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum; 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum; 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_mmx( 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum; 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum - (((unsigned int)xsum * xsum) >> 6)); 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan 250233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance16x16_mmx 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0, xsum1; 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0, xxsum1; 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_mmx( 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_mmx( 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr + 8, src_pixels_per_line, 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr + 8, dst_pixels_per_line, 16, 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum1, &xxsum1 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan xsum0 += xsum1; 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan xxsum0 += xxsum1; 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan 290233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_mse16x16_mmx( 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan return *sse; 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan 304233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance16x8_mmx 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0, xsum1; 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0, xxsum1; 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_mmx( 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 8, 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_mmx( 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr + 8, src_pixels_per_line, 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr + 8, dst_pixels_per_line, 8, 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum1, &xxsum1 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan xsum0 += xsum1; 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan xxsum0 += xxsum1; 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan 341233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance8x16_mmx 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xoffset, 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan int yoffset, 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 350233d2500723e5594f3e7c70896ffeeef32b9c950ywan) 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum; 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum; 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_filter_block2d_bil_var_mmx( 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum, &xxsum 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum; 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum - (((unsigned int)xsum * xsum) >> 7)); 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan 365233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_h_mmx( 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan 377233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_v_mmx( 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 382233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan 389233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_hv_mmx( 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 394233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 396233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr, recon_stride, sse); 398233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 399