1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* 2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Use of this source code is governed by a BSD-style license 5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * that can be found in the LICENSE file in the root of the source 6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * tree. An additional intellectual property rights grant can be found 7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * in the file PATENTS. All contributing project authors may 8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * be found in the AUTHORS file in the root of the source tree. 9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h" 12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/encoder/vp9_variance.h" 13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_pragmas.h" 14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vpx_ports/mem.h" 15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 16ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangextern unsigned int vp9_get8x8var_mmx 17ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang( 18ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 21ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *SSE, 23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int *Sum 24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang); 25ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangextern unsigned int vp9_get4x4var_mmx 26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang( 27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *SSE, 32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int *Sum 33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang); 34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance4x4_mmx( 36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *sse) { 41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int var; 42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int avg; 43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); 45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *sse = var; 46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 4)); 47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance8x8_mmx( 50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *sse) { 55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int var; 56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int avg; 57ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); 59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *sse = var; 60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 6)); 62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_mse16x16_mmx( 65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *sse) { 70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int sse0, sse1, sse2, sse3, var; 71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int sum0, sum1, sum2, sum3; 72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang &sum0); 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang &sse1, &sum1); 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang var = sse0 + sse1 + sse2 + sse3; 84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *sse = var; 85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return var; 86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance16x16_mmx( 90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *sse) { 95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int sse0, sse1, sse2, sse3, var; 96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int sum0, sum1, sum2, sum3, avg; 97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang &sum0); 1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, 1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang &sse1, &sum1); 1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, 1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); 1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang var = sse0 + sse1 + sse2 + sse3; 108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang avg = sum0 + sum1 + sum2 + sum3; 109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *sse = var; 110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 8)); 111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance16x8_mmx( 114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *sse) { 119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int sse0, sse1, var; 120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int sum0, sum1, avg; 121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang &sum0); 1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang &sse1, &sum1); 126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang var = sse0 + sse1; 128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang avg = sum0 + sum1; 129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *sse = var; 130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 7)); 131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangunsigned int vp9_variance8x16_mmx( 135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *src_ptr, 136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int source_stride, 137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const unsigned char *ref_ptr, 138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int recon_stride, 139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int *sse) { 140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int sse0, sse1, var; 141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int sum0, sum1, avg; 142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang &sum0); 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); 147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang var = sse0 + sse1; 149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang avg = sum0 + sum1; 150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *sse = var; 151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (var - (((unsigned int)avg * avg) >> 7)); 153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 154