1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vpx_config.h" 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/encoder/vp9_variance.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_pragmas.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp9_get4x4var_mmx 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan 27233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_get16x16var_sse2 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 36233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_get8x8var_sse2 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 45233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_half_horiz_vert_variance8x_h_sse2 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 55233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_half_horiz_vert_variance16x_h_sse2 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 65233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_half_horiz_variance8x_h_sse2 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 75233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_half_horiz_variance16x_h_sse2 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 85233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_half_vert_variance8x_h_sse2 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 95233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_half_vert_variance16x_h_sse2 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan int ref_pixels_per_line, 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int Height, 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *sum, 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sumsquared 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan 106233d2500723e5594f3e7c70896ffeeef32b9c950ywantypedef unsigned int (*get_var_sse2) ( 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan 115233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void variance_sse2(const unsigned char *src_ptr, int source_stride, 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, int recon_stride, 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, int h, unsigned int *sse, int *sum, 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan get_var_sse2 var_fn, int block_size) { 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0; 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0; 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan int i, j; 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = 0; 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sum = 0; 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (i = 0; i < h; i += block_size) { 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (j = 0; j < w; j += block_size) { 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_fn(src_ptr + source_stride * i + j, source_stride, 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse += sse0; 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sum += sum0; 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan 136233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x4_sse2( 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get4x4var_mmx, 4); 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 4)); 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan 151233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get4x4var_mmx, 4); 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 5)); 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan 165233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get4x4var_mmx, 4); 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 5)); 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan 179233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x8_sse2 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get8x8var_sse2, 8); 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 6)); 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan 195233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x8_sse2 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get8x8var_sse2, 8); 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan 211233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x16_sse2 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get8x8var_sse2, 8); 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan 227233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x16_sse2 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_sse2, 16); 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 8)); 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan 243233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x16_sse2( 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0; 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0; 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sum0); 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = sse0; 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse0; 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan 257233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_sse2, 16); 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 10)); 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan 271233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_sse2, 16); 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 9)); 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan 285233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_sse2, 16); 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 9)); 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan 299233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_sse2, 16); 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 12)); 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan 313233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_sse2, 16); 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 11)); 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan 327233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_sse2, 16); 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 11)); 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define DECL(w, opt) \ 342233d2500723e5594f3e7c70896ffeeef32b9c950ywanint vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan ptrdiff_t src_stride, \ 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, int y_offset, \ 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, \ 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan ptrdiff_t dst_stride, \ 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan int height, unsigned int *sse) 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define DECLS(opt1, opt2) \ 349233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECL(4, opt2); \ 350233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECL(8, opt1); \ 351233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECL(16, opt1) 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan 353233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECLS(sse2, sse); 354233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECLS(ssse3, ssse3); 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef DECLS 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef DECL 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 359233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, \ 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, \ 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, \ 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, \ 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, \ 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse_ptr) { \ 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse; \ 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_offset, dst, dst_stride, \ 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, &sse); \ 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (w > wf) { \ 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse2; \ 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan x_offset, y_offset, \ 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst + 16, dst_stride, \ 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, &sse2); \ 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; \ 377233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; \ 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (w > wf * 2) { \ 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan x_offset, y_offset, \ 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst + 32, dst_stride, \ 382233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, &sse2); \ 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; \ 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; \ 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan x_offset, y_offset, \ 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst + 48, dst_stride, \ 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan h, &sse2); \ 389233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; \ 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; \ 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse_ptr = sse; \ 394233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 396233d2500723e5594f3e7c70896ffeeef32b9c950ywan 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FNS(opt1, opt2) \ 398233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 399233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 400233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 401233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 402233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 403233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 404233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 405233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 406233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 407233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 408233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 409233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 410233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(4, 4, 4, 2, 2, opt2, (unsigned int)) 411233d2500723e5594f3e7c70896ffeeef32b9c950ywan 412233d2500723e5594f3e7c70896ffeeef32b9c950ywanFNS(sse2, sse); 413233d2500723e5594f3e7c70896ffeeef32b9c950ywanFNS(ssse3, ssse3); 414233d2500723e5594f3e7c70896ffeeef32b9c950ywan 415233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef FNS 416233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef FN 417233d2500723e5594f3e7c70896ffeeef32b9c950ywan 418233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define DECL(w, opt) \ 419233d2500723e5594f3e7c70896ffeeef32b9c950ywanint vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ 420233d2500723e5594f3e7c70896ffeeef32b9c950ywan ptrdiff_t src_stride, \ 421233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, int y_offset, \ 422233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, \ 423233d2500723e5594f3e7c70896ffeeef32b9c950ywan ptrdiff_t dst_stride, \ 424233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *sec, \ 425233d2500723e5594f3e7c70896ffeeef32b9c950ywan ptrdiff_t sec_stride, \ 426233d2500723e5594f3e7c70896ffeeef32b9c950ywan int height, unsigned int *sse) 427233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define DECLS(opt1, opt2) \ 428233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECL(4, opt2); \ 429233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECL(8, opt1); \ 430233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECL(16, opt1) 431233d2500723e5594f3e7c70896ffeeef32b9c950ywan 432233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECLS(sse2, sse); 433233d2500723e5594f3e7c70896ffeeef32b9c950ywanDECLS(ssse3, ssse3); 434233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef DECL 435233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef DECLS 436233d2500723e5594f3e7c70896ffeeef32b9c950ywan 437233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 438233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ 439233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, \ 440233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, \ 441233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, \ 442233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, \ 443233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, \ 444233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sseptr, \ 445233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *sec) { \ 446233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse; \ 447233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ 448233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_offset, dst, dst_stride, \ 449233d2500723e5594f3e7c70896ffeeef32b9c950ywan sec, w, h, &sse); \ 450233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (w > wf) { \ 451233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse2; \ 452233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ 453233d2500723e5594f3e7c70896ffeeef32b9c950ywan x_offset, y_offset, \ 454233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst + 16, dst_stride, \ 455233d2500723e5594f3e7c70896ffeeef32b9c950ywan sec + 16, w, h, &sse2); \ 456233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; \ 457233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; \ 458233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (w > wf * 2) { \ 459233d2500723e5594f3e7c70896ffeeef32b9c950ywan se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ 460233d2500723e5594f3e7c70896ffeeef32b9c950ywan x_offset, y_offset, \ 461233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst + 32, dst_stride, \ 462233d2500723e5594f3e7c70896ffeeef32b9c950ywan sec + 32, w, h, &sse2); \ 463233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; \ 464233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; \ 465233d2500723e5594f3e7c70896ffeeef32b9c950ywan se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ 466233d2500723e5594f3e7c70896ffeeef32b9c950ywan x_offset, y_offset, \ 467233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst + 48, dst_stride, \ 468233d2500723e5594f3e7c70896ffeeef32b9c950ywan sec + 48, w, h, &sse2); \ 469233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; \ 470233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; \ 471233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 472233d2500723e5594f3e7c70896ffeeef32b9c950ywan } \ 473233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sseptr = sse; \ 474233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 475233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 476233d2500723e5594f3e7c70896ffeeef32b9c950ywan 477233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define FNS(opt1, opt2) \ 478233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 479233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 480233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 481233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 482233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 483233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 484233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 485233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 486233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 487233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 488233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 489233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 490233d2500723e5594f3e7c70896ffeeef32b9c950ywanFN(4, 4, 4, 2, 2, opt2, (unsigned int)) 491233d2500723e5594f3e7c70896ffeeef32b9c950ywan 492233d2500723e5594f3e7c70896ffeeef32b9c950ywanFNS(sse2, sse); 493233d2500723e5594f3e7c70896ffeeef32b9c950ywanFNS(ssse3, ssse3); 494233d2500723e5594f3e7c70896ffeeef32b9c950ywan 495233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef FNS 496233d2500723e5594f3e7c70896ffeeef32b9c950ywan#undef FN 497233d2500723e5594f3e7c70896ffeeef32b9c950ywan 498233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_h_sse2( 499233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 500233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 501233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 502233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 503233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 504233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0; 505233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0; 506233d2500723e5594f3e7c70896ffeeef32b9c950ywan 507233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_half_horiz_variance16x_h_sse2( 508233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 509233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 510233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 511233d2500723e5594f3e7c70896ffeeef32b9c950ywan 512233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 513233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 514233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 515233d2500723e5594f3e7c70896ffeeef32b9c950ywan 516233d2500723e5594f3e7c70896ffeeef32b9c950ywan 517233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_v_sse2( 518233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 519233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 520233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 521233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 522233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 523233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0; 524233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0; 525233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_half_vert_variance16x_h_sse2( 526233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 527233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 528233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 529233d2500723e5594f3e7c70896ffeeef32b9c950ywan 530233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 531233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 532233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 533233d2500723e5594f3e7c70896ffeeef32b9c950ywan 534233d2500723e5594f3e7c70896ffeeef32b9c950ywan 535233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance_halfpixvar16x16_hv_sse2( 536233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 537233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_pixels_per_line, 538233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *dst_ptr, 539233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_pixels_per_line, 540233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 541233d2500723e5594f3e7c70896ffeeef32b9c950ywan int xsum0; 542233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int xxsum0; 543233d2500723e5594f3e7c70896ffeeef32b9c950ywan 544233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_half_horiz_vert_variance16x_h_sse2( 545233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr, src_pixels_per_line, 546233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr, dst_pixels_per_line, 16, 547233d2500723e5594f3e7c70896ffeeef32b9c950ywan &xsum0, &xxsum0); 548233d2500723e5594f3e7c70896ffeeef32b9c950ywan 549233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = xxsum0; 550233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 551233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 552