1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "vpx_config.h" 12#include "vp8/common/variance.h" 13#include "vpx_ports/mem.h" 14 15extern unsigned int vp8_get16x16var_sse2 16( 17 const unsigned char *src_ptr, 18 int source_stride, 19 const unsigned char *ref_ptr, 20 int recon_stride, 21 unsigned int *SSE, 22 int *Sum 23); 24extern void vp8_half_horiz_vert_variance16x_h_sse2 25( 26 const unsigned char *ref_ptr, 27 int ref_pixels_per_line, 28 const unsigned char *src_ptr, 29 int src_pixels_per_line, 30 unsigned int Height, 31 int *sum, 32 unsigned int *sumsquared 33); 34extern void vp8_half_horiz_variance16x_h_sse2 35( 36 const unsigned char *ref_ptr, 37 int ref_pixels_per_line, 38 const unsigned char *src_ptr, 39 int src_pixels_per_line, 40 unsigned int Height, 41 int *sum, 42 unsigned int *sumsquared 43); 44extern void vp8_half_vert_variance16x_h_sse2 45( 46 const unsigned char *ref_ptr, 47 int ref_pixels_per_line, 48 const unsigned char *src_ptr, 49 int src_pixels_per_line, 50 unsigned int Height, 51 int *sum, 52 unsigned int *sumsquared 53); 54extern void vp8_filter_block2d_bil_var_ssse3 55( 56 const unsigned char *ref_ptr, 57 int ref_pixels_per_line, 58 const unsigned char *src_ptr, 59 int src_pixels_per_line, 60 unsigned int Height, 61 int xoffset, 62 int yoffset, 63 int *sum, 64 unsigned int *sumsquared 65); 66 67unsigned int vp8_sub_pixel_variance16x16_ssse3 68( 69 const unsigned char *src_ptr, 70 int src_pixels_per_line, 71 int xoffset, 72 int yoffset, 73 const unsigned char *dst_ptr, 74 int dst_pixels_per_line, 75 unsigned int *sse 76) 77{ 78 int xsum0; 79 unsigned int xxsum0; 80 81 /* note we could avoid these if statements if the calling function 82 * just called the appropriate functions inside. 83 */ 84 if (xoffset == 4 && yoffset == 0) 85 { 86 vp8_half_horiz_variance16x_h_sse2( 87 src_ptr, src_pixels_per_line, 88 dst_ptr, dst_pixels_per_line, 16, 89 &xsum0, &xxsum0); 90 } 91 else if (xoffset == 0 && yoffset == 4) 92 { 93 vp8_half_vert_variance16x_h_sse2( 94 src_ptr, src_pixels_per_line, 95 dst_ptr, dst_pixels_per_line, 16, 96 &xsum0, &xxsum0); 97 } 98 else if (xoffset == 4 && yoffset == 4) 99 { 100 vp8_half_horiz_vert_variance16x_h_sse2( 101 src_ptr, src_pixels_per_line, 102 dst_ptr, dst_pixels_per_line, 16, 103 &xsum0, &xxsum0); 104 } 105 else 106 { 107 vp8_filter_block2d_bil_var_ssse3( 108 src_ptr, src_pixels_per_line, 109 dst_ptr, dst_pixels_per_line, 16, 110 xoffset, yoffset, 111 &xsum0, &xxsum0); 112 } 113 114 *sse = xxsum0; 115 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 116} 117 118unsigned int vp8_sub_pixel_variance16x8_ssse3 119( 120 const unsigned char *src_ptr, 121 int src_pixels_per_line, 122 int xoffset, 123 int yoffset, 124 const unsigned char *dst_ptr, 125 int dst_pixels_per_line, 126 unsigned int *sse 127 128) 129{ 130 int xsum0; 131 unsigned int xxsum0; 132 133 if (xoffset == 4 && yoffset == 0) 134 { 135 vp8_half_horiz_variance16x_h_sse2( 136 src_ptr, src_pixels_per_line, 137 dst_ptr, dst_pixels_per_line, 8, 138 &xsum0, &xxsum0); 139 } 140 else if (xoffset == 0 && yoffset == 4) 141 { 142 vp8_half_vert_variance16x_h_sse2( 143 src_ptr, src_pixels_per_line, 144 dst_ptr, dst_pixels_per_line, 8, 145 &xsum0, &xxsum0); 146 } 147 else if (xoffset == 4 && yoffset == 4) 148 { 149 vp8_half_horiz_vert_variance16x_h_sse2( 150 src_ptr, src_pixels_per_line, 151 dst_ptr, dst_pixels_per_line, 8, 152 &xsum0, &xxsum0); 153 } 154 else 155 { 156 vp8_filter_block2d_bil_var_ssse3( 157 src_ptr, src_pixels_per_line, 158 dst_ptr, dst_pixels_per_line, 8, 159 xoffset, yoffset, 160 &xsum0, &xxsum0); 161 } 162 163 *sse = xxsum0; 164 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 165} 166