1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vpx_config.h" 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/encoder/vp9_variance.h" 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_pragmas.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan 16233d2500723e5594f3e7c70896ffeeef32b9c950ywantypedef void (*get_var_avx2) ( 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan 25233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_get16x16var_avx2 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan 35233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_get32x32var_avx2 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *SSE, 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan int *Sum 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan 45233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32xh_avx2 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *src, 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan int height, 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan 57233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32xh_avx2 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *src, 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *sec, 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sec_stride, 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan int height, 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sseptr 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan); 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan 71233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void variance_avx2(const unsigned char *src_ptr, int source_stride, 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, int recon_stride, 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, int h, unsigned int *sse, int *sum, 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan get_var_avx2 var_fn, int block_size) { 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0; 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0; 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan int i, j; 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = 0; 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sum = 0; 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (i = 0; i < h; i += 16) { 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (j = 0; j < w; j += block_size) { 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 16 rows horizontally each call 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan var_fn(src_ptr + source_stride * i + j, source_stride, 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse += sse0; 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sum += sum0; 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan 93233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance16x16_avx2 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan( 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_avx2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get16x16var_avx2, 16); 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((unsigned int)avg * avg) >> 8)); 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan 109233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_mse16x16_avx2( 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *src_ptr, 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan const unsigned char *ref_ptr, 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse0; 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan int sum0; 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_get16x16var_avx2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan &sum0); 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = sse0; 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse0; 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan 123233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 elements vertically in parallel 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_avx2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get32x32var_avx2, 32); 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 10)); 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan 138233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 elements vertically in parallel 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_avx2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get32x32var_avx2, 32); 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 9)); 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan 154233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 elements vertically in parallel 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_avx2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get32x32var_avx2, 32); 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 12)); 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan 169233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan int source_stride, 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *ref_ptr, 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan int recon_stride, 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse) { 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int var; 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan int avg; 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 elements vertically in parallel 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan variance_avx2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan &var, &avg, vp9_get32x32var_avx2, 32); 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse = var; 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan return (var - (((int64_t)avg * avg) >> 11)); 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan 185233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src, 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse_ptr) { 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 elements in parallel 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse; 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset, 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_offset, dst, dst_stride, 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan 64, &sse); 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing the next 32 elements in parallel 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse2; 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se2 = vp9_sub_pixel_variance32xh_avx2(src + 32, src_stride, 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan x_offset, y_offset, 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst + 32, dst_stride, 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan 64, &sse2); 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse_ptr = sse; 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse - (((int64_t)se * se) >> 12); 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan 209233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src, 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sse_ptr) { 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 element in parallel 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse; 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset, 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_offset, dst, dst_stride, 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan 32, &sse); 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sse_ptr = sse; 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse - (((int64_t)se * se) >> 10); 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan 225233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src, 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sseptr, 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *sec) { 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 elements in parallel 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse; 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset, 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_offset, dst, dst_stride, 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan sec, 64, 64, &sse); 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse2; 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing the next 32 elements in parallel 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se2 = vp9_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset, 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_offset, dst + 32, dst_stride, 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan sec + 32, 64, 64, &sse2); 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan se += se2; 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan sse += sse2; 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sseptr = sse; 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse - (((int64_t)se * se) >> 12); 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan 251233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src, 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x_offset, 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan int y_offset, 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *dst, 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride, 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int *sseptr, 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *sec) { 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan // processing 32 element in parallel 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int sse; 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset, 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan y_offset, dst, dst_stride, 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan sec, 32, 32, &sse); 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan *sseptr = sse; 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan return sse - (((int64_t)se * se) >> 10); 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan 269