1/* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <assert.h> 12 13#include "./vpx_config.h" 14#include "./vp9_rtcd.h" 15#include "vp9/common/vp9_common.h" 16#include "vp9/common/vp9_convolve.h" 17#include "vp9/common/vp9_filter.h" 18#include "vpx/vpx_integer.h" 19#include "vpx_ports/mem.h" 20 21static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, 22 uint8_t *dst, ptrdiff_t dst_stride, 23 const InterpKernel *x_filters, 24 int x0_q4, int x_step_q4, int w, int h) { 25 int x, y; 26 src -= SUBPEL_TAPS / 2 - 1; 27 for (y = 0; y < h; ++y) { 28 int x_q4 = x0_q4; 29 for (x = 0; x < w; ++x) { 30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; 31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; 32 int k, sum = 0; 33 for (k = 0; k < SUBPEL_TAPS; ++k) 34 sum += src_x[k] * x_filter[k]; 35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 36 x_q4 += x_step_q4; 37 } 38 src += src_stride; 39 dst += dst_stride; 40 } 41} 42 43static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, 44 uint8_t *dst, ptrdiff_t dst_stride, 45 const InterpKernel *x_filters, 46 int x0_q4, int x_step_q4, int w, int h) { 47 int x, y; 48 src -= SUBPEL_TAPS / 2 - 1; 49 for (y = 0; y < h; ++y) { 50 int x_q4 = x0_q4; 51 for (x = 0; x < w; ++x) { 52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; 53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; 54 int k, sum = 0; 55 for (k = 0; k < SUBPEL_TAPS; ++k) 56 sum += src_x[k] * x_filter[k]; 57 dst[x] = ROUND_POWER_OF_TWO(dst[x] + 58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 59 x_q4 += x_step_q4; 60 } 61 src += src_stride; 62 dst += dst_stride; 63 } 64} 65 66static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, 67 uint8_t *dst, ptrdiff_t dst_stride, 68 const InterpKernel *y_filters, 69 int y0_q4, int y_step_q4, int w, int h) { 70 int x, y; 71 src -= src_stride * (SUBPEL_TAPS / 2 - 1); 72 73 for (x = 0; x < w; ++x) { 74 int y_q4 = y0_q4; 75 for (y = 0; y < h; ++y) { 76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; 77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; 78 int k, sum = 0; 79 for (k = 0; k < SUBPEL_TAPS; ++k) 80 sum += src_y[k * src_stride] * y_filter[k]; 81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 82 y_q4 += y_step_q4; 83 } 84 ++src; 85 ++dst; 86 } 87} 88 89static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, 90 uint8_t *dst, ptrdiff_t dst_stride, 91 const InterpKernel *y_filters, 92 int y0_q4, int y_step_q4, int w, int h) { 93 int x, y; 94 src -= src_stride * (SUBPEL_TAPS / 2 - 1); 95 96 for (x = 0; x < w; ++x) { 97 int y_q4 = y0_q4; 98 for (y = 0; y < h; ++y) { 99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; 100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; 101 int k, sum = 0; 102 for (k = 0; k < SUBPEL_TAPS; ++k) 103 sum += src_y[k * src_stride] * y_filter[k]; 104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + 105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 106 y_q4 += y_step_q4; 107 } 108 ++src; 109 ++dst; 110 } 111} 112 113static void convolve(const uint8_t *src, ptrdiff_t src_stride, 114 uint8_t *dst, ptrdiff_t dst_stride, 115 const InterpKernel *const x_filters, 116 int x0_q4, int x_step_q4, 117 const InterpKernel *const y_filters, 118 int y0_q4, int y_step_q4, 119 int w, int h) { 120 // Fixed size intermediate buffer places limits on parameters. 121 // Maximum intermediate_height is 324, for y_step_q4 == 80, 122 // h == 64, taps == 8. 123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc 124 uint8_t temp[64 * 324]; 125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; 126 127 assert(w <= 64); 128 assert(h <= 64); 129 assert(y_step_q4 <= 80); 130 assert(x_step_q4 <= 80); 131 132 if (intermediate_height < h) 133 intermediate_height = h; 134 135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, 136 x_filters, x0_q4, x_step_q4, w, intermediate_height); 137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, 138 y_filters, y0_q4, y_step_q4, w, h); 139} 140 141static const InterpKernel *get_filter_base(const int16_t *filter) { 142 // NOTE: This assumes that the filter table is 256-byte aligned. 143 // TODO(agrange) Modify to make independent of table alignment. 144 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); 145} 146 147static int get_filter_offset(const int16_t *f, const InterpKernel *base) { 148 return (int)((const InterpKernel *)(intptr_t)f - base); 149} 150 151void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 152 uint8_t *dst, ptrdiff_t dst_stride, 153 const int16_t *filter_x, int x_step_q4, 154 const int16_t *filter_y, int y_step_q4, 155 int w, int h) { 156 const InterpKernel *const filters_x = get_filter_base(filter_x); 157 const int x0_q4 = get_filter_offset(filter_x, filters_x); 158 159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, 160 x0_q4, x_step_q4, w, h); 161} 162 163void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 164 uint8_t *dst, ptrdiff_t dst_stride, 165 const int16_t *filter_x, int x_step_q4, 166 const int16_t *filter_y, int y_step_q4, 167 int w, int h) { 168 const InterpKernel *const filters_x = get_filter_base(filter_x); 169 const int x0_q4 = get_filter_offset(filter_x, filters_x); 170 171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, 172 x0_q4, x_step_q4, w, h); 173} 174 175void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, 176 uint8_t *dst, ptrdiff_t dst_stride, 177 const int16_t *filter_x, int x_step_q4, 178 const int16_t *filter_y, int y_step_q4, 179 int w, int h) { 180 const InterpKernel *const filters_y = get_filter_base(filter_y); 181 const int y0_q4 = get_filter_offset(filter_y, filters_y); 182 convolve_vert(src, src_stride, dst, dst_stride, filters_y, 183 y0_q4, y_step_q4, w, h); 184} 185 186void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, 187 uint8_t *dst, ptrdiff_t dst_stride, 188 const int16_t *filter_x, int x_step_q4, 189 const int16_t *filter_y, int y_step_q4, 190 int w, int h) { 191 const InterpKernel *const filters_y = get_filter_base(filter_y); 192 const int y0_q4 = get_filter_offset(filter_y, filters_y); 193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, 194 y0_q4, y_step_q4, w, h); 195} 196 197void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, 198 uint8_t *dst, ptrdiff_t dst_stride, 199 const int16_t *filter_x, int x_step_q4, 200 const int16_t *filter_y, int y_step_q4, 201 int w, int h) { 202 const InterpKernel *const filters_x = get_filter_base(filter_x); 203 const int x0_q4 = get_filter_offset(filter_x, filters_x); 204 205 const InterpKernel *const filters_y = get_filter_base(filter_y); 206 const int y0_q4 = get_filter_offset(filter_y, filters_y); 207 208 convolve(src, src_stride, dst, dst_stride, 209 filters_x, x0_q4, x_step_q4, 210 filters_y, y0_q4, y_step_q4, w, h); 211} 212 213void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, 214 uint8_t *dst, ptrdiff_t dst_stride, 215 const int16_t *filter_x, int x_step_q4, 216 const int16_t *filter_y, int y_step_q4, 217 int w, int h) { 218 /* Fixed size intermediate buffer places limits on parameters. */ 219 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); 220 assert(w <= 64); 221 assert(h <= 64); 222 223 vp9_convolve8_c(src, src_stride, temp, 64, 224 filter_x, x_step_q4, filter_y, y_step_q4, w, h); 225 vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); 226} 227 228void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, 229 uint8_t *dst, ptrdiff_t dst_stride, 230 const int16_t *filter_x, int filter_x_stride, 231 const int16_t *filter_y, int filter_y_stride, 232 int w, int h) { 233 int r; 234 235 for (r = h; r > 0; --r) { 236 vpx_memcpy(dst, src, w); 237 src += src_stride; 238 dst += dst_stride; 239 } 240} 241 242void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, 243 uint8_t *dst, ptrdiff_t dst_stride, 244 const int16_t *filter_x, int filter_x_stride, 245 const int16_t *filter_y, int filter_y_stride, 246 int w, int h) { 247 int x, y; 248 249 for (y = 0; y < h; ++y) { 250 for (x = 0; x < w; ++x) 251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); 252 253 src += src_stride; 254 dst += dst_stride; 255 } 256} 257