16fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org/* 26fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 36fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org * 46fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org * Use of this source code is governed by a BSD-style license 56fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org * that can be found in the LICENSE file in the root of the source 66fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org * tree. An additional intellectual property rights grant can be found 76fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org * in the file PATENTS. All contributing project authors may 86fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org * be found in the AUTHORS file in the root of the source tree. 96fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org */ 106fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 11d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#include <emmintrin.h> // SSE2 12d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 13ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.org#include "./vpx_config.h" 14d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 156fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org#include "vp9/encoder/vp9_variance.h" 166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org#include "vpx_ports/mem.h" 176fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 1888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgtypedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride, 1988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const unsigned char *ref, int ref_stride, 2088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org unsigned int *sse, int *sum); 216fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 22d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_get_mb_ss_sse2(const int16_t *src) { 23d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __m128i vsum = _mm_setzero_si128(); 24d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 26d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < 32; ++i) { 27d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i v = _mm_loadu_si128((const __m128i *)src); 28d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); 29d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src += 8; 30d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 3110a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 32d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); 33d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); 34d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return _mm_cvtsi128_si32(vsum); 35d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 36d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 37d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#define READ64(p, stride, i) \ 38d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \ 39d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride))) 40d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 41d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_get4x4var_sse2(const uint8_t *src, int src_stride, 42d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *ref, int ref_stride, 43d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse, int *sum) { 44d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i zero = _mm_setzero_si128(); 45d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero); 46d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero); 47d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero); 48d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero); 49d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i diff0 = _mm_sub_epi16(src0, ref0); 50d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i diff1 = _mm_sub_epi16(src1, ref1); 51d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 52d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // sum 53d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __m128i vsum = _mm_add_epi16(diff0, diff1); 54d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); 55d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); 56d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); 57d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sum = (int16_t)_mm_extract_epi16(vsum, 0); 58d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 59d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // sse 60d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi32(_mm_madd_epi16(diff0, diff0), 61d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org _mm_madd_epi16(diff1, diff1)); 62d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); 63d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); 64d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sse = _mm_cvtsi128_si32(vsum); 65d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 66d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return 0; 67d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 68d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 69d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_get8x8var_sse2(const uint8_t *src, int src_stride, 70d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *ref, int ref_stride, 71d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse, int *sum) { 72d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i zero = _mm_setzero_si128(); 73d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __m128i vsum = _mm_setzero_si128(); 74d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __m128i vsse = _mm_setzero_si128(); 75d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 76d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 77d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < 8; i += 2) { 78d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i src0 = _mm_unpacklo_epi8(_mm_loadl_epi64( 79d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org (const __m128i *)(src + i * src_stride)), zero); 80d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i ref0 = _mm_unpacklo_epi8(_mm_loadl_epi64( 81d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org (const __m128i *)(ref + i * ref_stride)), zero); 82d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i diff0 = _mm_sub_epi16(src0, ref0); 83d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 84d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i src1 = _mm_unpacklo_epi8(_mm_loadl_epi64( 85d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org (const __m128i *)(src + (i + 1) * src_stride)), zero); 86d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i ref1 = _mm_unpacklo_epi8(_mm_loadl_epi64( 87d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org (const __m128i *)(ref + (i + 1) * ref_stride)), zero); 88d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i diff1 = _mm_sub_epi16(src1, ref1); 89d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 90d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, diff0); 91d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, diff1); 92d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); 93d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); 94d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 95d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 96d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // sum 97d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); 98d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); 99d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); 100d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sum = (int16_t)_mm_extract_epi16(vsum, 0); 101d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 102d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // sse 103d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); 104d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); 105d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sse = _mm_cvtsi128_si32(vsse); 106d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 107d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return 0; 108d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 109d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 110d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_get16x16var_sse2(const uint8_t *src, int src_stride, 111d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *ref, int ref_stride, 112d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse, int *sum) { 113d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i zero = _mm_setzero_si128(); 114d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __m128i vsum = _mm_setzero_si128(); 115d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org __m128i vsse = _mm_setzero_si128(); 116d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 117d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 118d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < 16; ++i) { 119d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i s = _mm_loadu_si128((const __m128i *)src); 120d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i r = _mm_loadu_si128((const __m128i *)ref); 121d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 122d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i src0 = _mm_unpacklo_epi8(s, zero); 123d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i ref0 = _mm_unpacklo_epi8(r, zero); 124d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i diff0 = _mm_sub_epi16(src0, ref0); 125d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 126d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i src1 = _mm_unpackhi_epi8(s, zero); 127d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i ref1 = _mm_unpackhi_epi8(r, zero); 128d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const __m128i diff1 = _mm_sub_epi16(src1, ref1); 129d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 130d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, diff0); 131d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, diff1); 132d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); 133d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); 134d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 135d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org src += src_stride; 136d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org ref += ref_stride; 137d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 138d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 139d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // sum 140d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); 141d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); 142d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sum = (int16_t)_mm_extract_epi16(vsum, 0) + 143d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org (int16_t)_mm_extract_epi16(vsum, 1); 144d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 145d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org // sse 146d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); 147d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); 148d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *sse = _mm_cvtsi128_si32(vsse); 149d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 150d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return 0; 151d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 15288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org 15388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org 15488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgstatic void variance_sse2(const unsigned char *src, int src_stride, 15588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const unsigned char *ref, int ref_stride, 15688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int w, int h, unsigned int *sse, int *sum, 15788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_fn_t var_fn, int block_size) { 15810a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org int i, j; 15910a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 16010a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org *sse = 0; 16110a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org *sum = 0; 16210a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 16310a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org for (i = 0; i < h; i += block_size) { 16410a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org for (j = 0; j < w; j += block_size) { 16588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org unsigned int sse0; 16688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum0; 16788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org var_fn(src + src_stride * i + j, src_stride, 16888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org ref + ref_stride * i + j, ref_stride, &sse0, &sum0); 16910a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org *sse += sse0; 17010a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org *sum += sum0; 17110a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org } 17210a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org } 17310a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 17410a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 17588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride, 17688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const unsigned char *ref, int ref_stride, 17788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org unsigned int *sse) { 17888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 179d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vp9_get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum); 18088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((unsigned int)sum * sum) >> 4); 18110a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 18210a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 18388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride, 18488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 18510a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 18688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 18788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 8, 4, 188d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org sse, &sum, vp9_get4x4var_sse2, 4); 18988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((unsigned int)sum * sum) >> 5); 19010a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 19110a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 19288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride, 19388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 19410a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 19588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 19688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 4, 8, 197d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org sse, &sum, vp9_get4x4var_sse2, 4); 19888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((unsigned int)sum * sum) >> 5); 1996fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org} 2006fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 20188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride, 20288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const unsigned char *ref, int ref_stride, 20388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org unsigned int *sse) { 20488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 205d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vp9_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum); 20688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((unsigned int)sum * sum) >> 6); 2076fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org} 2086fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 20988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride, 21088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const unsigned char *ref, int ref_stride, 21188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org unsigned int *sse) { 21288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 21388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 16, 8, 21488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get8x8var_sse2, 8); 21588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((unsigned int)sum * sum) >> 7); 2166fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org} 2173f0af3b06425f635f3559f0bd4f53efea95fa5e2johannkoenig@chromium.org 21888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride, 21988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const unsigned char *ref, int ref_stride, 22088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org unsigned int *sse) { 22188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 22288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 8, 16, 22388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get8x8var_sse2, 8); 22488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((unsigned int)sum * sum) >> 7); 2256fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org} 2266fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 22788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride, 22888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const unsigned char *ref, int ref_stride, 22988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org unsigned int *sse) { 23088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 23188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); 232d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return *sse - (((unsigned int)sum * sum) >> 8); 23310a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 23410a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 23588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride, 23688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 23710a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 23888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 23988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 32, 32, 24088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get16x16var_sse2, 16); 24188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((int64_t)sum * sum) >> 10); 24210a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 24310a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 24488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride, 24588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 24610a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 24788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 24888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 32, 16, 24988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get16x16var_sse2, 16); 25088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((int64_t)sum * sum) >> 9); 25110a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 25210a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 25388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride, 25488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 25510a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 25688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 25788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 16, 32, 25888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get16x16var_sse2, 16); 25988b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((int64_t)sum * sum) >> 9); 26010a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 26110a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 26288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride, 26388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 26410a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 26588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 26688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 64, 64, 26788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get16x16var_sse2, 16); 26888b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((int64_t)sum * sum) >> 12); 26910a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 27010a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 27188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride, 27288b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 27310a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 27488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 27588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 64, 32, 27688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get16x16var_sse2, 16); 27788b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((int64_t)sum * sum) >> 11); 27810a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org} 27910a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org 28088b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.orgunsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride, 28188b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org const uint8_t *ref, int ref_stride, 28210a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org unsigned int *sse) { 28388b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org int sum; 28488b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org variance_sse2(src, src_stride, ref, ref_stride, 32, 64, 28588b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org sse, &sum, vp9_get16x16var_sse2, 16); 28688b47b29cc274dd19cddc37c1ce1834d97df282efgalligan@chromium.org return *sse - (((int64_t)sum * sum) >> 11); 2876fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org} 2886fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 289d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_mse8x8_sse2(const uint8_t *src, int src_stride, 290d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *ref, int ref_stride, 291d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 292d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vp9_variance8x8_sse2(src, src_stride, ref, ref_stride, sse); 293d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return *sse; 294d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 295d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 296d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_mse8x16_sse2(const uint8_t *src, int src_stride, 297d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *ref, int ref_stride, 298d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 299d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vp9_variance8x16_sse2(src, src_stride, ref, ref_stride, sse); 300d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return *sse; 301d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 302d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 303d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_mse16x8_sse2(const uint8_t *src, int src_stride, 304d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *ref, int ref_stride, 305d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 306d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vp9_variance16x8_sse2(src, src_stride, ref, ref_stride, sse); 307d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return *sse; 308d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 309d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 310d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgunsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, 311d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org const uint8_t *ref, int ref_stride, 312d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org unsigned int *sse) { 313d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); 314d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org return *sse; 315d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org} 316d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org 317d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define DECL(w, opt) \ 318d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgint vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ 319d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org ptrdiff_t src_stride, \ 320d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int x_offset, int y_offset, \ 321d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org const uint8_t *dst, \ 322d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org ptrdiff_t dst_stride, \ 323d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int height, unsigned int *sse) 324d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define DECLS(opt1, opt2) \ 325d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECL(4, opt2); \ 326d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECL(8, opt1); \ 327d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECL(16, opt1) 328d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 329d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECLS(sse2, sse); 330d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECLS(ssse3, ssse3); 331d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef DECLS 332d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef DECL 333d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 334d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 335d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgunsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ 336d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int src_stride, \ 337d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int x_offset, \ 338d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int y_offset, \ 339d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org const uint8_t *dst, \ 340d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int dst_stride, \ 341d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org unsigned int *sse_ptr) { \ 342d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org unsigned int sse; \ 343d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ 344d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org y_offset, dst, dst_stride, \ 345d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org h, &sse); \ 346d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org if (w > wf) { \ 347d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org unsigned int sse2; \ 348d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ 349d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org x_offset, y_offset, \ 350d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org dst + 16, dst_stride, \ 351d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org h, &sse2); \ 352d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se += se2; \ 353d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sse += sse2; \ 354d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org if (w > wf * 2) { \ 355d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ 356d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org x_offset, y_offset, \ 357d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org dst + 32, dst_stride, \ 358d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org h, &sse2); \ 359d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se += se2; \ 360d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sse += sse2; \ 361d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ 362d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org x_offset, y_offset, \ 363d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org dst + 48, dst_stride, \ 364d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org h, &sse2); \ 365d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se += se2; \ 366d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sse += sse2; \ 367d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org } \ 368d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org } \ 369d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org *sse_ptr = sse; \ 370d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 3716fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org} 3726fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 373d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define FNS(opt1, opt2) \ 374d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 375d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 376d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 377d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 378d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 379d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 380d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 381ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 382ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 383ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 384ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 385ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 386ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(4, 4, 4, 2, 2, opt2, (unsigned int)) 387d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 388d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFNS(sse2, sse); 389d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFNS(ssse3, ssse3); 390d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 391d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef FNS 392d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef FN 393d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 394d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define DECL(w, opt) \ 395d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgint vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ 396d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org ptrdiff_t src_stride, \ 397d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int x_offset, int y_offset, \ 398d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org const uint8_t *dst, \ 399d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org ptrdiff_t dst_stride, \ 400d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org const uint8_t *sec, \ 401d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org ptrdiff_t sec_stride, \ 402d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int height, unsigned int *sse) 403d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define DECLS(opt1, opt2) \ 404d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECL(4, opt2); \ 405d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECL(8, opt1); \ 406d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECL(16, opt1) 407d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 408d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECLS(sse2, sse); 409d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgDECLS(ssse3, ssse3); 410d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef DECL 411d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef DECLS 412d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 413d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 414d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgunsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ 415d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int src_stride, \ 416d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int x_offset, \ 417d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int y_offset, \ 418d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org const uint8_t *dst, \ 419d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int dst_stride, \ 420d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org unsigned int *sseptr, \ 421d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org const uint8_t *sec) { \ 422d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org unsigned int sse; \ 423d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ 424d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org y_offset, dst, dst_stride, \ 425d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sec, w, h, &sse); \ 426d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org if (w > wf) { \ 427d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org unsigned int sse2; \ 428d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ 429d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org x_offset, y_offset, \ 430d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org dst + 16, dst_stride, \ 431d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sec + 16, w, h, &sse2); \ 432d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se += se2; \ 433d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sse += sse2; \ 434d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org if (w > wf * 2) { \ 435d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ 436d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org x_offset, y_offset, \ 437d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org dst + 32, dst_stride, \ 438d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sec + 32, w, h, &sse2); \ 439d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se += se2; \ 440d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sse += sse2; \ 441d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ 442d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org x_offset, y_offset, \ 443d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org dst + 48, dst_stride, \ 444d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sec + 48, w, h, &sse2); \ 445d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org se += se2; \ 446d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org sse += sse2; \ 447d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org } \ 448d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org } \ 449d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org *sseptr = sse; \ 450d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 4516fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org} 4526fefe538d859300e7febe78271828198c10f1b52fgalligan@chromium.org 453d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#define FNS(opt1, opt2) \ 454d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 455d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 456d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 457d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 458d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 459d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 460d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 461ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 462ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 463ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 464ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 465ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 466ecee051929d6ced19cf324688774acccc9ad4a0ajohannkoenig@chromium.orgFN(4, 4, 4, 2, 2, opt2, (unsigned int)) 467d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 468d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFNS(sse2, sse); 469d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.orgFNS(ssse3, ssse3); 470d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org 471d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef FNS 472d0351deb5037171ecec154298d37e3a74d992b0dfgalligan@chromium.org#undef FN 473