variance_mmx.c revision 7ce0a1d1337c01056ba24006efab21f00e179e04
1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vpx_dsp_rtcd.h" 12 13#include "vpx_ports/mem.h" 14 15DECLARE_ALIGNED(16, static const int16_t, bilinear_filters_mmx[8][8]) = { 16 { 128, 128, 128, 128, 0, 0, 0, 0 }, 17 { 112, 112, 112, 112, 16, 16, 16, 16 }, 18 { 96, 96, 96, 96, 32, 32, 32, 32 }, 19 { 80, 80, 80, 80, 48, 48, 48, 48 }, 20 { 64, 64, 64, 64, 64, 64, 64, 64 }, 21 { 48, 48, 48, 48, 80, 80, 80, 80 }, 22 { 32, 32, 32, 32, 96, 96, 96, 96 }, 23 { 16, 16, 16, 16, 112, 112, 112, 112 } 24}; 25 26extern void vpx_get4x4var_mmx(const uint8_t *a, int a_stride, 27 const uint8_t *b, int b_stride, 28 unsigned int *sse, int *sum); 29 30extern void vpx_filter_block2d_bil4x4_var_mmx(const unsigned char *ref_ptr, 31 int ref_pixels_per_line, 32 const unsigned char *src_ptr, 33 int src_pixels_per_line, 34 const int16_t *HFilter, 35 const int16_t *VFilter, 36 int *sum, 37 unsigned int *sumsquared); 38 39extern void vpx_filter_block2d_bil_var_mmx(const unsigned char *ref_ptr, 40 int ref_pixels_per_line, 41 const unsigned char *src_ptr, 42 int src_pixels_per_line, 43 unsigned int Height, 44 const int16_t *HFilter, 45 const int16_t *VFilter, 46 int *sum, 47 unsigned int *sumsquared); 48 49 50unsigned int vpx_variance4x4_mmx(const unsigned char *a, int a_stride, 51 const unsigned char *b, int b_stride, 52 unsigned int *sse) { 53 unsigned int var; 54 int avg; 55 56 vpx_get4x4var_mmx(a, a_stride, b, b_stride, &var, &avg); 57 *sse = var; 58 return (var - (((unsigned int)avg * avg) >> 4)); 59} 60 61unsigned int vpx_variance8x8_mmx(const unsigned char *a, int a_stride, 62 const unsigned char *b, int b_stride, 63 unsigned int *sse) { 64 unsigned int var; 65 int avg; 66 67 vpx_get8x8var_mmx(a, a_stride, b, b_stride, &var, &avg); 68 *sse = var; 69 70 return (var - (((unsigned int)avg * avg) >> 6)); 71} 72 73unsigned int vpx_mse16x16_mmx(const unsigned char *a, int a_stride, 74 const unsigned char *b, int b_stride, 75 unsigned int *sse) { 76 unsigned int sse0, sse1, sse2, sse3, var; 77 int sum0, sum1, sum2, sum3; 78 79 vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); 80 vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); 81 vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, 82 b + 8 * b_stride, b_stride, &sse2, &sum2); 83 vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, 84 b + 8 * b_stride + 8, b_stride, &sse3, &sum3); 85 86 var = sse0 + sse1 + sse2 + sse3; 87 *sse = var; 88 return var; 89} 90 91unsigned int vpx_variance16x16_mmx(const unsigned char *a, int a_stride, 92 const unsigned char *b, int b_stride, 93 unsigned int *sse) { 94 unsigned int sse0, sse1, sse2, sse3, var; 95 int sum0, sum1, sum2, sum3, avg; 96 97 vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); 98 vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); 99 vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, 100 b + 8 * b_stride, b_stride, &sse2, &sum2); 101 vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, 102 b + 8 * b_stride + 8, b_stride, &sse3, &sum3); 103 104 var = sse0 + sse1 + sse2 + sse3; 105 avg = sum0 + sum1 + sum2 + sum3; 106 *sse = var; 107 return (var - (((unsigned int)avg * avg) >> 8)); 108} 109 110unsigned int vpx_variance16x8_mmx(const unsigned char *a, int a_stride, 111 const unsigned char *b, int b_stride, 112 unsigned int *sse) { 113 unsigned int sse0, sse1, var; 114 int sum0, sum1, avg; 115 116 vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); 117 vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); 118 119 var = sse0 + sse1; 120 avg = sum0 + sum1; 121 *sse = var; 122 return (var - (((unsigned int)avg * avg) >> 7)); 123} 124 125unsigned int vpx_variance8x16_mmx(const unsigned char *a, int a_stride, 126 const unsigned char *b, int b_stride, 127 unsigned int *sse) { 128 unsigned int sse0, sse1, var; 129 int sum0, sum1, avg; 130 131 vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); 132 vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, 133 b + 8 * b_stride, b_stride, &sse1, &sum1); 134 135 var = sse0 + sse1; 136 avg = sum0 + sum1; 137 *sse = var; 138 139 return (var - (((unsigned int)avg * avg) >> 7)); 140} 141 142uint32_t vpx_sub_pixel_variance4x4_mmx(const uint8_t *a, int a_stride, 143 int xoffset, int yoffset, 144 const uint8_t *b, int b_stride, 145 uint32_t *sse) { 146 int xsum; 147 unsigned int xxsum; 148 vpx_filter_block2d_bil4x4_var_mmx(a, a_stride, b, b_stride, 149 bilinear_filters_mmx[xoffset], 150 bilinear_filters_mmx[yoffset], 151 &xsum, &xxsum); 152 *sse = xxsum; 153 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); 154} 155 156 157uint32_t vpx_sub_pixel_variance8x8_mmx(const uint8_t *a, int a_stride, 158 int xoffset, int yoffset, 159 const uint8_t *b, int b_stride, 160 uint32_t *sse) { 161 int xsum; 162 uint32_t xxsum; 163 vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 8, 164 bilinear_filters_mmx[xoffset], 165 bilinear_filters_mmx[yoffset], 166 &xsum, &xxsum); 167 *sse = xxsum; 168 return (xxsum - (((uint32_t)xsum * xsum) >> 6)); 169} 170 171uint32_t vpx_sub_pixel_variance16x16_mmx(const uint8_t *a, int a_stride, 172 int xoffset, int yoffset, 173 const uint8_t *b, int b_stride, 174 uint32_t *sse) { 175 int xsum0, xsum1; 176 unsigned int xxsum0, xxsum1; 177 178 vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 16, 179 bilinear_filters_mmx[xoffset], 180 bilinear_filters_mmx[yoffset], 181 &xsum0, &xxsum0); 182 183 vpx_filter_block2d_bil_var_mmx(a + 8, a_stride, b + 8, b_stride, 16, 184 bilinear_filters_mmx[xoffset], 185 bilinear_filters_mmx[yoffset], 186 &xsum1, &xxsum1); 187 188 xsum0 += xsum1; 189 xxsum0 += xxsum1; 190 191 *sse = xxsum0; 192 return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); 193} 194 195uint32_t vpx_sub_pixel_variance16x8_mmx(const uint8_t *a, int a_stride, 196 int xoffset, int yoffset, 197 const uint8_t *b, int b_stride, 198 uint32_t *sse) { 199 int xsum0, xsum1; 200 unsigned int xxsum0, xxsum1; 201 202 vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 8, 203 bilinear_filters_mmx[xoffset], 204 bilinear_filters_mmx[yoffset], 205 &xsum0, &xxsum0); 206 207 vpx_filter_block2d_bil_var_mmx(a + 8, a_stride, b + 8, b_stride, 8, 208 bilinear_filters_mmx[xoffset], 209 bilinear_filters_mmx[yoffset], 210 &xsum1, &xxsum1); 211 212 xsum0 += xsum1; 213 xxsum0 += xxsum1; 214 215 *sse = xxsum0; 216 return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 7)); 217} 218 219uint32_t vpx_sub_pixel_variance8x16_mmx(const uint8_t *a, int a_stride, 220 int xoffset, int yoffset, 221 const uint8_t *b, int b_stride, 222 uint32_t *sse) { 223 int xsum; 224 unsigned int xxsum; 225 vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 16, 226 bilinear_filters_mmx[xoffset], 227 bilinear_filters_mmx[yoffset], 228 &xsum, &xxsum); 229 *sse = xxsum; 230 return (xxsum - (((uint32_t)xsum * xsum) >> 7)); 231} 232 233uint32_t vpx_variance_halfpixvar16x16_h_mmx(const uint8_t *a, int a_stride, 234 const uint8_t *b, int b_stride, 235 uint32_t *sse) { 236 return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 4, 0, b, b_stride, sse); 237} 238 239uint32_t vpx_variance_halfpixvar16x16_v_mmx(const uint8_t *a, int a_stride, 240 const uint8_t *b, int b_stride, 241 uint32_t *sse) { 242 return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 0, 4, b, b_stride, sse); 243} 244 245uint32_t vpx_variance_halfpixvar16x16_hv_mmx(const uint8_t *a, int a_stride, 246 const uint8_t *b, int b_stride, 247 uint32_t *sse) { 248 return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 4, 4, b, b_stride, sse); 249} 250