1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vpx_config.h" 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/encoder/vp9_variance.h" 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_pragmas.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan 16233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp9_get8x8var_mmx 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 25233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern unsigned int vp9_get4x4var_mmx 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan 35233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance4x4_mmx( 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 4)); 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan 49233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x8_mmx( 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 6)); 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan 64233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x16_mmx( 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, sse2, sse3, var; 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, sum2, sum3; 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sum0); 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sse1, &sum1); 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1 + sse2 + sse3; 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan return var; 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan 89233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x16_mmx( 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, sse2, sse3, var; 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, sum2, sum3, avg; 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sum0); 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sse1, &sum1); 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1 + sse2 + sse3; 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1 + sum2 + sum3; 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 8)); 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan 113233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x8_mmx( 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, var; 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, avg; 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sum0); 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sse1, &sum1); 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1; 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1; 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan 134233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance8x16_mmx( 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0, sse1, var; 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0, sum1, avg; 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sum0); 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan var = sse0 + sse1; 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan avg = sum0 + sum1; 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 7)); 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 154