1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vpx_config.h" 12#include "vp9/common/vp9_loopfilter.h" 13#include "vp9/common/vp9_onyxc_int.h" 14#include "vp9/common/vp9_reconinter.h" 15#include "vpx_mem/vpx_mem.h" 16 17#include "vp9/common/vp9_seg_common.h" 18 19// 64 bit masks for left transform size. Each 1 represents a position where 20// we should apply a loop filter across the left border of an 8x8 block 21// boundary. 22// 23// In the case of TX_16X16-> ( in low order byte first we end up with 24// a mask that looks like this 25// 26// 10101010 27// 10101010 28// 10101010 29// 10101010 30// 10101010 31// 10101010 32// 10101010 33// 10101010 34// 35// A loopfilter should be applied to every other 8x8 horizontally. 36static const uint64_t left_64x64_txform_mask[TX_SIZES]= { 37 0xffffffffffffffff, // TX_4X4 38 0xffffffffffffffff, // TX_8x8 39 0x5555555555555555, // TX_16x16 40 0x1111111111111111, // TX_32x32 41}; 42 43// 64 bit masks for above transform size. Each 1 represents a position where 44// we should apply a loop filter across the top border of an 8x8 block 45// boundary. 46// 47// In the case of TX_32x32 -> ( in low order byte first we end up with 48// a mask that looks like this 49// 50// 11111111 51// 00000000 52// 00000000 53// 00000000 54// 11111111 55// 00000000 56// 00000000 57// 00000000 58// 59// A loopfilter should be applied to every other 4 the row vertically. 60static const uint64_t above_64x64_txform_mask[TX_SIZES]= { 61 0xffffffffffffffff, // TX_4X4 62 0xffffffffffffffff, // TX_8x8 63 0x00ff00ff00ff00ff, // TX_16x16 64 0x000000ff000000ff, // TX_32x32 65}; 66 67// 64 bit masks for prediction sizes (left). Each 1 represents a position 68// where left border of an 8x8 block. These are aligned to the right most 69// appropriate bit, and then shifted into place. 70// 71// In the case of TX_16x32 -> ( low order byte first ) we end up with 72// a mask that looks like this : 73// 74// 10000000 75// 10000000 76// 10000000 77// 10000000 78// 00000000 79// 00000000 80// 00000000 81// 00000000 82static const uint64_t left_prediction_mask[BLOCK_SIZES] = { 83 0x0000000000000001, // BLOCK_4X4, 84 0x0000000000000001, // BLOCK_4X8, 85 0x0000000000000001, // BLOCK_8X4, 86 0x0000000000000001, // BLOCK_8X8, 87 0x0000000000000101, // BLOCK_8X16, 88 0x0000000000000001, // BLOCK_16X8, 89 0x0000000000000101, // BLOCK_16X16, 90 0x0000000001010101, // BLOCK_16X32, 91 0x0000000000000101, // BLOCK_32X16, 92 0x0000000001010101, // BLOCK_32X32, 93 0x0101010101010101, // BLOCK_32X64, 94 0x0000000001010101, // BLOCK_64X32, 95 0x0101010101010101, // BLOCK_64X64 96}; 97 98// 64 bit mask to shift and set for each prediction size. 99static const uint64_t above_prediction_mask[BLOCK_SIZES] = { 100 0x0000000000000001, // BLOCK_4X4 101 0x0000000000000001, // BLOCK_4X8 102 0x0000000000000001, // BLOCK_8X4 103 0x0000000000000001, // BLOCK_8X8 104 0x0000000000000001, // BLOCK_8X16, 105 0x0000000000000003, // BLOCK_16X8 106 0x0000000000000003, // BLOCK_16X16 107 0x0000000000000003, // BLOCK_16X32, 108 0x000000000000000f, // BLOCK_32X16, 109 0x000000000000000f, // BLOCK_32X32, 110 0x000000000000000f, // BLOCK_32X64, 111 0x00000000000000ff, // BLOCK_64X32, 112 0x00000000000000ff, // BLOCK_64X64 113}; 114// 64 bit mask to shift and set for each prediction size. A bit is set for 115// each 8x8 block that would be in the left most block of the given block 116// size in the 64x64 block. 117static const uint64_t size_mask[BLOCK_SIZES] = { 118 0x0000000000000001, // BLOCK_4X4 119 0x0000000000000001, // BLOCK_4X8 120 0x0000000000000001, // BLOCK_8X4 121 0x0000000000000001, // BLOCK_8X8 122 0x0000000000000101, // BLOCK_8X16, 123 0x0000000000000003, // BLOCK_16X8 124 0x0000000000000303, // BLOCK_16X16 125 0x0000000003030303, // BLOCK_16X32, 126 0x0000000000000f0f, // BLOCK_32X16, 127 0x000000000f0f0f0f, // BLOCK_32X32, 128 0x0f0f0f0f0f0f0f0f, // BLOCK_32X64, 129 0x00000000ffffffff, // BLOCK_64X32, 130 0xffffffffffffffff, // BLOCK_64X64 131}; 132 133// These are used for masking the left and above borders. 134static const uint64_t left_border = 0x1111111111111111; 135static const uint64_t above_border = 0x000000ff000000ff; 136 137// 16 bit masks for uv transform sizes. 138static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= { 139 0xffff, // TX_4X4 140 0xffff, // TX_8x8 141 0x5555, // TX_16x16 142 0x1111, // TX_32x32 143}; 144 145static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= { 146 0xffff, // TX_4X4 147 0xffff, // TX_8x8 148 0x0f0f, // TX_16x16 149 0x000f, // TX_32x32 150}; 151 152// 16 bit left mask to shift and set for each uv prediction size. 153static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { 154 0x0001, // BLOCK_4X4, 155 0x0001, // BLOCK_4X8, 156 0x0001, // BLOCK_8X4, 157 0x0001, // BLOCK_8X8, 158 0x0001, // BLOCK_8X16, 159 0x0001, // BLOCK_16X8, 160 0x0001, // BLOCK_16X16, 161 0x0011, // BLOCK_16X32, 162 0x0001, // BLOCK_32X16, 163 0x0011, // BLOCK_32X32, 164 0x1111, // BLOCK_32X64 165 0x0011, // BLOCK_64X32, 166 0x1111, // BLOCK_64X64 167}; 168// 16 bit above mask to shift and set for uv each prediction size. 169static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { 170 0x0001, // BLOCK_4X4 171 0x0001, // BLOCK_4X8 172 0x0001, // BLOCK_8X4 173 0x0001, // BLOCK_8X8 174 0x0001, // BLOCK_8X16, 175 0x0001, // BLOCK_16X8 176 0x0001, // BLOCK_16X16 177 0x0001, // BLOCK_16X32, 178 0x0003, // BLOCK_32X16, 179 0x0003, // BLOCK_32X32, 180 0x0003, // BLOCK_32X64, 181 0x000f, // BLOCK_64X32, 182 0x000f, // BLOCK_64X64 183}; 184 185// 64 bit mask to shift and set for each uv prediction size 186static const uint16_t size_mask_uv[BLOCK_SIZES] = { 187 0x0001, // BLOCK_4X4 188 0x0001, // BLOCK_4X8 189 0x0001, // BLOCK_8X4 190 0x0001, // BLOCK_8X8 191 0x0001, // BLOCK_8X16, 192 0x0001, // BLOCK_16X8 193 0x0001, // BLOCK_16X16 194 0x0011, // BLOCK_16X32, 195 0x0003, // BLOCK_32X16, 196 0x0033, // BLOCK_32X32, 197 0x3333, // BLOCK_32X64, 198 0x00ff, // BLOCK_64X32, 199 0xffff, // BLOCK_64X64 200}; 201static const uint16_t left_border_uv = 0x1111; 202static const uint16_t above_border_uv = 0x000f; 203 204static const int mode_lf_lut[MB_MODE_COUNT] = { 205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES 206 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) 207}; 208 209static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { 210 int lvl; 211 212 // For each possible value for the loop filter fill out limits 213 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { 214 // Set loop filter paramaeters that control sharpness. 215 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); 216 217 if (sharpness_lvl > 0) { 218 if (block_inside_limit > (9 - sharpness_lvl)) 219 block_inside_limit = (9 - sharpness_lvl); 220 } 221 222 if (block_inside_limit < 1) 223 block_inside_limit = 1; 224 225 vpx_memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); 226 vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), 227 SIMD_WIDTH); 228 } 229} 230 231static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, 232 const MB_MODE_INFO *mbmi) { 233 return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] 234 [mode_lf_lut[mbmi->mode]]; 235} 236 237void vp9_loop_filter_init(VP9_COMMON *cm) { 238 loop_filter_info_n *lfi = &cm->lf_info; 239 struct loopfilter *lf = &cm->lf; 240 int lvl; 241 242 // init limits for given sharpness 243 update_sharpness(lfi, lf->sharpness_level); 244 lf->last_sharpness_level = lf->sharpness_level; 245 246 // init hev threshold const vectors 247 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) 248 vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); 249} 250 251void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { 252 int seg_id; 253 // n_shift is the a multiplier for lf_deltas 254 // the multiplier is 1 for when filter_lvl is between 0 and 31; 255 // 2 when filter_lvl is between 32 and 63 256 const int scale = 1 << (default_filt_lvl >> 5); 257 loop_filter_info_n *const lfi = &cm->lf_info; 258 struct loopfilter *const lf = &cm->lf; 259 const struct segmentation *const seg = &cm->seg; 260 261 // update limits if sharpness has changed 262 if (lf->last_sharpness_level != lf->sharpness_level) { 263 update_sharpness(lfi, lf->sharpness_level); 264 lf->last_sharpness_level = lf->sharpness_level; 265 } 266 267 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { 268 int lvl_seg = default_filt_lvl; 269 if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { 270 const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); 271 lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? 272 data : default_filt_lvl + data, 273 0, MAX_LOOP_FILTER); 274 } 275 276 if (!lf->mode_ref_delta_enabled) { 277 // we could get rid of this if we assume that deltas are set to 278 // zero when not in use; encoder always uses deltas 279 vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); 280 } else { 281 int ref, mode; 282 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; 283 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); 284 285 for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { 286 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { 287 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale 288 + lf->mode_deltas[mode] * scale; 289 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); 290 } 291 } 292 } 293 } 294} 295 296static void filter_selectively_vert_row2(PLANE_TYPE plane_type, 297 uint8_t *s, int pitch, 298 unsigned int mask_16x16_l, 299 unsigned int mask_8x8_l, 300 unsigned int mask_4x4_l, 301 unsigned int mask_4x4_int_l, 302 const loop_filter_info_n *lfi_n, 303 const uint8_t *lfl) { 304 const int mask_shift = plane_type ? 4 : 8; 305 const int mask_cutoff = plane_type ? 0xf : 0xff; 306 const int lfl_forward = plane_type ? 4 : 8; 307 308 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; 309 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; 310 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; 311 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; 312 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; 313 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; 314 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; 315 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; 316 unsigned int mask; 317 318 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | 319 mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; 320 mask; mask >>= 1) { 321 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; 322 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); 323 324 // TODO(yunqingwang): count in loopfilter functions should be removed. 325 if (mask & 1) { 326 if ((mask_16x16_0 | mask_16x16_1) & 1) { 327 if ((mask_16x16_0 & mask_16x16_1) & 1) { 328 vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, 329 lfi0->hev_thr); 330 } else if (mask_16x16_0 & 1) { 331 vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, 332 lfi0->hev_thr); 333 } else { 334 vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, 335 lfi1->lim, lfi1->hev_thr); 336 } 337 } 338 339 if ((mask_8x8_0 | mask_8x8_1) & 1) { 340 if ((mask_8x8_0 & mask_8x8_1) & 1) { 341 vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, 342 lfi0->hev_thr, lfi1->mblim, lfi1->lim, 343 lfi1->hev_thr); 344 } else if (mask_8x8_0 & 1) { 345 vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 346 1); 347 } else { 348 vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, 349 lfi1->hev_thr, 1); 350 } 351 } 352 353 if ((mask_4x4_0 | mask_4x4_1) & 1) { 354 if ((mask_4x4_0 & mask_4x4_1) & 1) { 355 vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, 356 lfi0->hev_thr, lfi1->mblim, lfi1->lim, 357 lfi1->hev_thr); 358 } else if (mask_4x4_0 & 1) { 359 vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 360 1); 361 } else { 362 vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, 363 lfi1->hev_thr, 1); 364 } 365 } 366 367 if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { 368 if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { 369 vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, 370 lfi0->hev_thr, lfi1->mblim, lfi1->lim, 371 lfi1->hev_thr); 372 } else if (mask_4x4_int_0 & 1) { 373 vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, 374 lfi0->hev_thr, 1); 375 } else { 376 vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, 377 lfi1->hev_thr, 1); 378 } 379 } 380 } 381 382 s += 8; 383 lfl += 1; 384 mask_16x16_0 >>= 1; 385 mask_8x8_0 >>= 1; 386 mask_4x4_0 >>= 1; 387 mask_4x4_int_0 >>= 1; 388 mask_16x16_1 >>= 1; 389 mask_8x8_1 >>= 1; 390 mask_4x4_1 >>= 1; 391 mask_4x4_int_1 >>= 1; 392 } 393} 394 395static void filter_selectively_horiz(uint8_t *s, int pitch, 396 unsigned int mask_16x16, 397 unsigned int mask_8x8, 398 unsigned int mask_4x4, 399 unsigned int mask_4x4_int, 400 const loop_filter_info_n *lfi_n, 401 const uint8_t *lfl) { 402 unsigned int mask; 403 int count; 404 405 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; 406 mask; mask >>= count) { 407 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; 408 409 count = 1; 410 if (mask & 1) { 411 if (mask_16x16 & 1) { 412 if ((mask_16x16 & 3) == 3) { 413 vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, 414 lfi->hev_thr, 2); 415 count = 2; 416 } else { 417 vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, 418 lfi->hev_thr, 1); 419 } 420 } else if (mask_8x8 & 1) { 421 if ((mask_8x8 & 3) == 3) { 422 // Next block's thresholds 423 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); 424 425 vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, 426 lfi->hev_thr, lfin->mblim, lfin->lim, 427 lfin->hev_thr); 428 429 if ((mask_4x4_int & 3) == 3) { 430 vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, 431 lfi->lim, lfi->hev_thr, lfin->mblim, 432 lfin->lim, lfin->hev_thr); 433 } else { 434 if (mask_4x4_int & 1) 435 vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 436 lfi->hev_thr, 1); 437 else if (mask_4x4_int & 2) 438 vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, 439 lfin->lim, lfin->hev_thr, 1); 440 } 441 count = 2; 442 } else { 443 vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); 444 445 if (mask_4x4_int & 1) 446 vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 447 lfi->hev_thr, 1); 448 } 449 } else if (mask_4x4 & 1) { 450 if ((mask_4x4 & 3) == 3) { 451 // Next block's thresholds 452 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); 453 454 vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, 455 lfi->hev_thr, lfin->mblim, lfin->lim, 456 lfin->hev_thr); 457 if ((mask_4x4_int & 3) == 3) { 458 vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, 459 lfi->lim, lfi->hev_thr, lfin->mblim, 460 lfin->lim, lfin->hev_thr); 461 } else { 462 if (mask_4x4_int & 1) 463 vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 464 lfi->hev_thr, 1); 465 else if (mask_4x4_int & 2) 466 vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, 467 lfin->lim, lfin->hev_thr, 1); 468 } 469 count = 2; 470 } else { 471 vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); 472 473 if (mask_4x4_int & 1) 474 vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 475 lfi->hev_thr, 1); 476 } 477 } else if (mask_4x4_int & 1) { 478 vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 479 lfi->hev_thr, 1); 480 } 481 } 482 s += 8 * count; 483 lfl += count; 484 mask_16x16 >>= count; 485 mask_8x8 >>= count; 486 mask_4x4 >>= count; 487 mask_4x4_int >>= count; 488 } 489} 490 491// This function ors into the current lfm structure, where to do loop 492// filters for the specific mi we are looking at. It uses information 493// including the block_size_type (32x16, 32x32, etc), the transform size, 494// whether there were any coefficients encoded, and the loop filter strength 495// block we are currently looking at. Shift is used to position the 496// 1's we produce. 497// TODO(JBB) Need another function for different resolution color.. 498static void build_masks(const loop_filter_info_n *const lfi_n, 499 const MODE_INFO *mi, const int shift_y, 500 const int shift_uv, 501 LOOP_FILTER_MASK *lfm) { 502 const MB_MODE_INFO *mbmi = &mi->mbmi; 503 const BLOCK_SIZE block_size = mbmi->sb_type; 504 const TX_SIZE tx_size_y = mbmi->tx_size; 505 const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi); 506 const int filter_level = get_filter_level(lfi_n, mbmi); 507 uint64_t *const left_y = &lfm->left_y[tx_size_y]; 508 uint64_t *const above_y = &lfm->above_y[tx_size_y]; 509 uint64_t *const int_4x4_y = &lfm->int_4x4_y; 510 uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; 511 uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; 512 uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; 513 int i; 514 515 // If filter level is 0 we don't loop filter. 516 if (!filter_level) { 517 return; 518 } else { 519 const int w = num_8x8_blocks_wide_lookup[block_size]; 520 const int h = num_8x8_blocks_high_lookup[block_size]; 521 int index = shift_y; 522 for (i = 0; i < h; i++) { 523 vpx_memset(&lfm->lfl_y[index], filter_level, w); 524 index += 8; 525 } 526 } 527 528 // These set 1 in the current block size for the block size edges. 529 // For instance if the block size is 32x16, we'll set : 530 // above = 1111 531 // 0000 532 // and 533 // left = 1000 534 // = 1000 535 // NOTE : In this example the low bit is left most ( 1000 ) is stored as 536 // 1, not 8... 537 // 538 // U and v set things on a 16 bit scale. 539 // 540 *above_y |= above_prediction_mask[block_size] << shift_y; 541 *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; 542 *left_y |= left_prediction_mask[block_size] << shift_y; 543 *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; 544 545 // If the block has no coefficients and is not intra we skip applying 546 // the loop filter on block edges. 547 if (mbmi->skip && is_inter_block(mbmi)) 548 return; 549 550 // Here we are adding a mask for the transform size. The transform 551 // size mask is set to be correct for a 64x64 prediction block size. We 552 // mask to match the size of the block we are working on and then shift it 553 // into place.. 554 *above_y |= (size_mask[block_size] & 555 above_64x64_txform_mask[tx_size_y]) << shift_y; 556 *above_uv |= (size_mask_uv[block_size] & 557 above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; 558 559 *left_y |= (size_mask[block_size] & 560 left_64x64_txform_mask[tx_size_y]) << shift_y; 561 *left_uv |= (size_mask_uv[block_size] & 562 left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; 563 564 // Here we are trying to determine what to do with the internal 4x4 block 565 // boundaries. These differ from the 4x4 boundaries on the outside edge of 566 // an 8x8 in that the internal ones can be skipped and don't depend on 567 // the prediction block size. 568 if (tx_size_y == TX_4X4) 569 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; 570 571 if (tx_size_uv == TX_4X4) 572 *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; 573} 574 575// This function does the same thing as the one above with the exception that 576// it only affects the y masks. It exists because for blocks < 16x16 in size, 577// we only update u and v masks on the first block. 578static void build_y_mask(const loop_filter_info_n *const lfi_n, 579 const MODE_INFO *mi, const int shift_y, 580 LOOP_FILTER_MASK *lfm) { 581 const MB_MODE_INFO *mbmi = &mi->mbmi; 582 const BLOCK_SIZE block_size = mbmi->sb_type; 583 const TX_SIZE tx_size_y = mbmi->tx_size; 584 const int filter_level = get_filter_level(lfi_n, mbmi); 585 uint64_t *const left_y = &lfm->left_y[tx_size_y]; 586 uint64_t *const above_y = &lfm->above_y[tx_size_y]; 587 uint64_t *const int_4x4_y = &lfm->int_4x4_y; 588 int i; 589 590 if (!filter_level) { 591 return; 592 } else { 593 const int w = num_8x8_blocks_wide_lookup[block_size]; 594 const int h = num_8x8_blocks_high_lookup[block_size]; 595 int index = shift_y; 596 for (i = 0; i < h; i++) { 597 vpx_memset(&lfm->lfl_y[index], filter_level, w); 598 index += 8; 599 } 600 } 601 602 *above_y |= above_prediction_mask[block_size] << shift_y; 603 *left_y |= left_prediction_mask[block_size] << shift_y; 604 605 if (mbmi->skip && is_inter_block(mbmi)) 606 return; 607 608 *above_y |= (size_mask[block_size] & 609 above_64x64_txform_mask[tx_size_y]) << shift_y; 610 611 *left_y |= (size_mask[block_size] & 612 left_64x64_txform_mask[tx_size_y]) << shift_y; 613 614 if (tx_size_y == TX_4X4) 615 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; 616} 617 618// This function sets up the bit masks for the entire 64x64 region represented 619// by mi_row, mi_col. 620// TODO(JBB): This function only works for yv12. 621void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, 622 MODE_INFO **mi_8x8, const int mode_info_stride, 623 LOOP_FILTER_MASK *lfm) { 624 int idx_32, idx_16, idx_8; 625 const loop_filter_info_n *const lfi_n = &cm->lf_info; 626 MODE_INFO **mip = mi_8x8; 627 MODE_INFO **mip2 = mi_8x8; 628 629 // These are offsets to the next mi in the 64x64 block. It is what gets 630 // added to the mi ptr as we go through each loop. It helps us to avoids 631 // setting up special row and column counters for each index. The last step 632 // brings us out back to the starting position. 633 const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, 634 -(mode_info_stride << 2) - 4}; 635 const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, 636 -(mode_info_stride << 1) - 2}; 637 const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; 638 639 // Following variables represent shifts to position the current block 640 // mask over the appropriate block. A shift of 36 to the left will move 641 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left 642 // 4 rows to the appropriate spot. 643 const int shift_32_y[] = {0, 4, 32, 36}; 644 const int shift_16_y[] = {0, 2, 16, 18}; 645 const int shift_8_y[] = {0, 1, 8, 9}; 646 const int shift_32_uv[] = {0, 2, 8, 10}; 647 const int shift_16_uv[] = {0, 1, 4, 5}; 648 int i; 649 const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? 650 cm->mi_rows - mi_row : MI_BLOCK_SIZE); 651 const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? 652 cm->mi_cols - mi_col : MI_BLOCK_SIZE); 653 654 vp9_zero(*lfm); 655 656 // TODO(jimbankoski): Try moving most of the following code into decode 657 // loop and storing lfm in the mbmi structure so that we don't have to go 658 // through the recursive loop structure multiple times. 659 switch (mip[0]->mbmi.sb_type) { 660 case BLOCK_64X64: 661 build_masks(lfi_n, mip[0] , 0, 0, lfm); 662 break; 663 case BLOCK_64X32: 664 build_masks(lfi_n, mip[0], 0, 0, lfm); 665 mip2 = mip + mode_info_stride * 4; 666 if (4 >= max_rows) 667 break; 668 build_masks(lfi_n, mip2[0], 32, 8, lfm); 669 break; 670 case BLOCK_32X64: 671 build_masks(lfi_n, mip[0], 0, 0, lfm); 672 mip2 = mip + 4; 673 if (4 >= max_cols) 674 break; 675 build_masks(lfi_n, mip2[0], 4, 2, lfm); 676 break; 677 default: 678 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { 679 const int shift_y = shift_32_y[idx_32]; 680 const int shift_uv = shift_32_uv[idx_32]; 681 const int mi_32_col_offset = ((idx_32 & 1) << 2); 682 const int mi_32_row_offset = ((idx_32 >> 1) << 2); 683 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) 684 continue; 685 switch (mip[0]->mbmi.sb_type) { 686 case BLOCK_32X32: 687 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 688 break; 689 case BLOCK_32X16: 690 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 691 if (mi_32_row_offset + 2 >= max_rows) 692 continue; 693 mip2 = mip + mode_info_stride * 2; 694 build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); 695 break; 696 case BLOCK_16X32: 697 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 698 if (mi_32_col_offset + 2 >= max_cols) 699 continue; 700 mip2 = mip + 2; 701 build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); 702 break; 703 default: 704 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { 705 const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; 706 const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; 707 const int mi_16_col_offset = mi_32_col_offset + 708 ((idx_16 & 1) << 1); 709 const int mi_16_row_offset = mi_32_row_offset + 710 ((idx_16 >> 1) << 1); 711 712 if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) 713 continue; 714 715 switch (mip[0]->mbmi.sb_type) { 716 case BLOCK_16X16: 717 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 718 break; 719 case BLOCK_16X8: 720 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 721 if (mi_16_row_offset + 1 >= max_rows) 722 continue; 723 mip2 = mip + mode_info_stride; 724 build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); 725 break; 726 case BLOCK_8X16: 727 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 728 if (mi_16_col_offset +1 >= max_cols) 729 continue; 730 mip2 = mip + 1; 731 build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); 732 break; 733 default: { 734 const int shift_y = shift_32_y[idx_32] + 735 shift_16_y[idx_16] + 736 shift_8_y[0]; 737 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 738 mip += offset[0]; 739 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { 740 const int shift_y = shift_32_y[idx_32] + 741 shift_16_y[idx_16] + 742 shift_8_y[idx_8]; 743 const int mi_8_col_offset = mi_16_col_offset + 744 ((idx_8 & 1)); 745 const int mi_8_row_offset = mi_16_row_offset + 746 ((idx_8 >> 1)); 747 748 if (mi_8_col_offset >= max_cols || 749 mi_8_row_offset >= max_rows) 750 continue; 751 build_y_mask(lfi_n, mip[0], shift_y, lfm); 752 } 753 break; 754 } 755 } 756 } 757 break; 758 } 759 } 760 break; 761 } 762 // The largest loopfilter we have is 16x16 so we use the 16x16 mask 763 // for 32x32 transforms also also. 764 lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; 765 lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; 766 lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; 767 lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; 768 769 // We do at least 8 tap filter on every 32x32 even if the transform size 770 // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and 771 // remove it from the 4x4. 772 lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; 773 lfm->left_y[TX_4X4] &= ~left_border; 774 lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; 775 lfm->above_y[TX_4X4] &= ~above_border; 776 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; 777 lfm->left_uv[TX_4X4] &= ~left_border_uv; 778 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; 779 lfm->above_uv[TX_4X4] &= ~above_border_uv; 780 781 // We do some special edge handling. 782 if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { 783 const uint64_t rows = cm->mi_rows - mi_row; 784 785 // Each pixel inside the border gets a 1, 786 const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); 787 const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); 788 789 // Remove values completely outside our border. 790 for (i = 0; i < TX_32X32; i++) { 791 lfm->left_y[i] &= mask_y; 792 lfm->above_y[i] &= mask_y; 793 lfm->left_uv[i] &= mask_uv; 794 lfm->above_uv[i] &= mask_uv; 795 } 796 lfm->int_4x4_y &= mask_y; 797 lfm->int_4x4_uv &= mask_uv; 798 799 // We don't apply a wide loop filter on the last uv block row. If set 800 // apply the shorter one instead. 801 if (rows == 1) { 802 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; 803 lfm->above_uv[TX_16X16] = 0; 804 } 805 if (rows == 5) { 806 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; 807 lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); 808 } 809 } 810 811 if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { 812 const uint64_t columns = cm->mi_cols - mi_col; 813 814 // Each pixel inside the border gets a 1, the multiply copies the border 815 // to where we need it. 816 const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101; 817 const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; 818 819 // Internal edges are not applied on the last column of the image so 820 // we mask 1 more for the internal edges 821 const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; 822 823 // Remove the bits outside the image edge. 824 for (i = 0; i < TX_32X32; i++) { 825 lfm->left_y[i] &= mask_y; 826 lfm->above_y[i] &= mask_y; 827 lfm->left_uv[i] &= mask_uv; 828 lfm->above_uv[i] &= mask_uv; 829 } 830 lfm->int_4x4_y &= mask_y; 831 lfm->int_4x4_uv &= mask_uv_int; 832 833 // We don't apply a wide loop filter on the last uv column. If set 834 // apply the shorter one instead. 835 if (columns == 1) { 836 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; 837 lfm->left_uv[TX_16X16] = 0; 838 } 839 if (columns == 5) { 840 lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); 841 lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); 842 } 843 } 844 // We don't a loop filter on the first column in the image. Mask that out. 845 if (mi_col == 0) { 846 for (i = 0; i < TX_32X32; i++) { 847 lfm->left_y[i] &= 0xfefefefefefefefe; 848 lfm->left_uv[i] &= 0xeeee; 849 } 850 } 851 852 // Assert if we try to apply 2 different loop filters at the same position. 853 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); 854 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); 855 assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); 856 assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); 857 assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); 858 assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); 859 assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); 860 assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); 861 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); 862 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); 863 assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); 864 assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); 865 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); 866 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); 867 assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); 868 assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); 869} 870 871static void filter_selectively_vert(uint8_t *s, int pitch, 872 unsigned int mask_16x16, 873 unsigned int mask_8x8, 874 unsigned int mask_4x4, 875 unsigned int mask_4x4_int, 876 const loop_filter_info_n *lfi_n, 877 const uint8_t *lfl) { 878 unsigned int mask; 879 880 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; 881 mask; mask >>= 1) { 882 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; 883 884 if (mask & 1) { 885 if (mask_16x16 & 1) { 886 vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 887 } else if (mask_8x8 & 1) { 888 vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); 889 } else if (mask_4x4 & 1) { 890 vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); 891 } 892 } 893 if (mask_4x4_int & 1) 894 vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); 895 s += 8; 896 lfl += 1; 897 mask_16x16 >>= 1; 898 mask_8x8 >>= 1; 899 mask_4x4 >>= 1; 900 mask_4x4_int >>= 1; 901 } 902} 903 904static void filter_block_plane_non420(VP9_COMMON *cm, 905 struct macroblockd_plane *plane, 906 MODE_INFO **mi_8x8, 907 int mi_row, int mi_col) { 908 const int ss_x = plane->subsampling_x; 909 const int ss_y = plane->subsampling_y; 910 const int row_step = 1 << ss_x; 911 const int col_step = 1 << ss_y; 912 const int row_step_stride = cm->mi_stride * row_step; 913 struct buf_2d *const dst = &plane->dst; 914 uint8_t* const dst0 = dst->buf; 915 unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; 916 unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; 917 unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; 918 unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; 919 uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; 920 int r, c; 921 922 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { 923 unsigned int mask_16x16_c = 0; 924 unsigned int mask_8x8_c = 0; 925 unsigned int mask_4x4_c = 0; 926 unsigned int border_mask; 927 928 // Determine the vertical edges that need filtering 929 for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { 930 const MODE_INFO *mi = mi_8x8[c]; 931 const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; 932 const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); 933 // left edge of current unit is block/partition edge -> no skip 934 const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? 935 !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; 936 const int skip_this_c = skip_this && !block_edge_left; 937 // top edge of current unit is block/partition edge -> no skip 938 const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? 939 !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; 940 const int skip_this_r = skip_this && !block_edge_above; 941 const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) 942 ? get_uv_tx_size(&mi[0].mbmi) 943 : mi[0].mbmi.tx_size; 944 const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; 945 const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; 946 947 // Filter level can vary per MI 948 if (!(lfl[(r << 3) + (c >> ss_x)] = 949 get_filter_level(&cm->lf_info, &mi[0].mbmi))) 950 continue; 951 952 // Build masks based on the transform size of each block 953 if (tx_size == TX_32X32) { 954 if (!skip_this_c && ((c >> ss_x) & 3) == 0) { 955 if (!skip_border_4x4_c) 956 mask_16x16_c |= 1 << (c >> ss_x); 957 else 958 mask_8x8_c |= 1 << (c >> ss_x); 959 } 960 if (!skip_this_r && ((r >> ss_y) & 3) == 0) { 961 if (!skip_border_4x4_r) 962 mask_16x16[r] |= 1 << (c >> ss_x); 963 else 964 mask_8x8[r] |= 1 << (c >> ss_x); 965 } 966 } else if (tx_size == TX_16X16) { 967 if (!skip_this_c && ((c >> ss_x) & 1) == 0) { 968 if (!skip_border_4x4_c) 969 mask_16x16_c |= 1 << (c >> ss_x); 970 else 971 mask_8x8_c |= 1 << (c >> ss_x); 972 } 973 if (!skip_this_r && ((r >> ss_y) & 1) == 0) { 974 if (!skip_border_4x4_r) 975 mask_16x16[r] |= 1 << (c >> ss_x); 976 else 977 mask_8x8[r] |= 1 << (c >> ss_x); 978 } 979 } else { 980 // force 8x8 filtering on 32x32 boundaries 981 if (!skip_this_c) { 982 if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) 983 mask_8x8_c |= 1 << (c >> ss_x); 984 else 985 mask_4x4_c |= 1 << (c >> ss_x); 986 } 987 988 if (!skip_this_r) { 989 if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) 990 mask_8x8[r] |= 1 << (c >> ss_x); 991 else 992 mask_4x4[r] |= 1 << (c >> ss_x); 993 } 994 995 if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) 996 mask_4x4_int[r] |= 1 << (c >> ss_x); 997 } 998 } 999 1000 // Disable filtering on the leftmost column 1001 border_mask = ~(mi_col == 0); 1002 filter_selectively_vert(dst->buf, dst->stride, 1003 mask_16x16_c & border_mask, 1004 mask_8x8_c & border_mask, 1005 mask_4x4_c & border_mask, 1006 mask_4x4_int[r], 1007 &cm->lf_info, &lfl[r << 3]); 1008 dst->buf += 8 * dst->stride; 1009 mi_8x8 += row_step_stride; 1010 } 1011 1012 // Now do horizontal pass 1013 dst->buf = dst0; 1014 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { 1015 const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; 1016 const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; 1017 1018 unsigned int mask_16x16_r; 1019 unsigned int mask_8x8_r; 1020 unsigned int mask_4x4_r; 1021 1022 if (mi_row + r == 0) { 1023 mask_16x16_r = 0; 1024 mask_8x8_r = 0; 1025 mask_4x4_r = 0; 1026 } else { 1027 mask_16x16_r = mask_16x16[r]; 1028 mask_8x8_r = mask_8x8[r]; 1029 mask_4x4_r = mask_4x4[r]; 1030 } 1031 1032 filter_selectively_horiz(dst->buf, dst->stride, 1033 mask_16x16_r, 1034 mask_8x8_r, 1035 mask_4x4_r, 1036 mask_4x4_int_r, 1037 &cm->lf_info, &lfl[r << 3]); 1038 dst->buf += 8 * dst->stride; 1039 } 1040} 1041 1042void vp9_filter_block_plane(VP9_COMMON *const cm, 1043 struct macroblockd_plane *const plane, 1044 int mi_row, 1045 LOOP_FILTER_MASK *lfm) { 1046 struct buf_2d *const dst = &plane->dst; 1047 uint8_t* const dst0 = dst->buf; 1048 int r, c; 1049 1050 if (!plane->plane_type) { 1051 uint64_t mask_16x16 = lfm->left_y[TX_16X16]; 1052 uint64_t mask_8x8 = lfm->left_y[TX_8X8]; 1053 uint64_t mask_4x4 = lfm->left_y[TX_4X4]; 1054 uint64_t mask_4x4_int = lfm->int_4x4_y; 1055 1056 // Vertical pass: do 2 rows at one time 1057 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { 1058 unsigned int mask_16x16_l = mask_16x16 & 0xffff; 1059 unsigned int mask_8x8_l = mask_8x8 & 0xffff; 1060 unsigned int mask_4x4_l = mask_4x4 & 0xffff; 1061 unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; 1062 1063 // Disable filtering on the leftmost column 1064 filter_selectively_vert_row2(plane->plane_type, 1065 dst->buf, dst->stride, 1066 mask_16x16_l, 1067 mask_8x8_l, 1068 mask_4x4_l, 1069 mask_4x4_int_l, 1070 &cm->lf_info, &lfm->lfl_y[r << 3]); 1071 1072 dst->buf += 16 * dst->stride; 1073 mask_16x16 >>= 16; 1074 mask_8x8 >>= 16; 1075 mask_4x4 >>= 16; 1076 mask_4x4_int >>= 16; 1077 } 1078 1079 // Horizontal pass 1080 dst->buf = dst0; 1081 mask_16x16 = lfm->above_y[TX_16X16]; 1082 mask_8x8 = lfm->above_y[TX_8X8]; 1083 mask_4x4 = lfm->above_y[TX_4X4]; 1084 mask_4x4_int = lfm->int_4x4_y; 1085 1086 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { 1087 unsigned int mask_16x16_r; 1088 unsigned int mask_8x8_r; 1089 unsigned int mask_4x4_r; 1090 1091 if (mi_row + r == 0) { 1092 mask_16x16_r = 0; 1093 mask_8x8_r = 0; 1094 mask_4x4_r = 0; 1095 } else { 1096 mask_16x16_r = mask_16x16 & 0xff; 1097 mask_8x8_r = mask_8x8 & 0xff; 1098 mask_4x4_r = mask_4x4 & 0xff; 1099 } 1100 1101 filter_selectively_horiz(dst->buf, dst->stride, 1102 mask_16x16_r, 1103 mask_8x8_r, 1104 mask_4x4_r, 1105 mask_4x4_int & 0xff, 1106 &cm->lf_info, &lfm->lfl_y[r << 3]); 1107 1108 dst->buf += 8 * dst->stride; 1109 mask_16x16 >>= 8; 1110 mask_8x8 >>= 8; 1111 mask_4x4 >>= 8; 1112 mask_4x4_int >>= 8; 1113 } 1114 } else { 1115 uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; 1116 uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; 1117 uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; 1118 uint16_t mask_4x4_int = lfm->int_4x4_uv; 1119 1120 // Vertical pass: do 2 rows at one time 1121 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { 1122 if (plane->plane_type == 1) { 1123 for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { 1124 lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; 1125 lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + 1126 (c << 1)]; 1127 } 1128 } 1129 1130 { 1131 unsigned int mask_16x16_l = mask_16x16 & 0xff; 1132 unsigned int mask_8x8_l = mask_8x8 & 0xff; 1133 unsigned int mask_4x4_l = mask_4x4 & 0xff; 1134 unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; 1135 1136 // Disable filtering on the leftmost column 1137 filter_selectively_vert_row2(plane->plane_type, 1138 dst->buf, dst->stride, 1139 mask_16x16_l, 1140 mask_8x8_l, 1141 mask_4x4_l, 1142 mask_4x4_int_l, 1143 &cm->lf_info, &lfm->lfl_uv[r << 1]); 1144 1145 dst->buf += 16 * dst->stride; 1146 mask_16x16 >>= 8; 1147 mask_8x8 >>= 8; 1148 mask_4x4 >>= 8; 1149 mask_4x4_int >>= 8; 1150 } 1151 } 1152 1153 // Horizontal pass 1154 dst->buf = dst0; 1155 mask_16x16 = lfm->above_uv[TX_16X16]; 1156 mask_8x8 = lfm->above_uv[TX_8X8]; 1157 mask_4x4 = lfm->above_uv[TX_4X4]; 1158 mask_4x4_int = lfm->int_4x4_uv; 1159 1160 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { 1161 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; 1162 const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 1163 0 : (mask_4x4_int & 0xf); 1164 unsigned int mask_16x16_r; 1165 unsigned int mask_8x8_r; 1166 unsigned int mask_4x4_r; 1167 1168 if (mi_row + r == 0) { 1169 mask_16x16_r = 0; 1170 mask_8x8_r = 0; 1171 mask_4x4_r = 0; 1172 } else { 1173 mask_16x16_r = mask_16x16 & 0xf; 1174 mask_8x8_r = mask_8x8 & 0xf; 1175 mask_4x4_r = mask_4x4 & 0xf; 1176 } 1177 1178 filter_selectively_horiz(dst->buf, dst->stride, 1179 mask_16x16_r, 1180 mask_8x8_r, 1181 mask_4x4_r, 1182 mask_4x4_int_r, 1183 &cm->lf_info, &lfm->lfl_uv[r << 1]); 1184 1185 dst->buf += 8 * dst->stride; 1186 mask_16x16 >>= 4; 1187 mask_8x8 >>= 4; 1188 mask_4x4 >>= 4; 1189 mask_4x4_int >>= 4; 1190 } 1191 } 1192} 1193 1194void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, 1195 VP9_COMMON *cm, MACROBLOCKD *xd, 1196 int start, int stop, int y_only) { 1197 const int num_planes = y_only ? 1 : MAX_MB_PLANE; 1198 int mi_row, mi_col; 1199 LOOP_FILTER_MASK lfm; 1200 int use_420 = y_only || (xd->plane[1].subsampling_y == 1 && 1201 xd->plane[1].subsampling_x == 1); 1202 1203 for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { 1204 MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; 1205 1206 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { 1207 int plane; 1208 1209 vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); 1210 1211 // TODO(JBB): Make setup_mask work for non 420. 1212 if (use_420) 1213 vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, 1214 &lfm); 1215 1216 for (plane = 0; plane < num_planes; ++plane) { 1217 if (use_420) 1218 vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); 1219 else 1220 filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col, 1221 mi_row, mi_col); 1222 } 1223 } 1224 } 1225} 1226 1227void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, 1228 int frame_filter_level, 1229 int y_only, int partial_frame) { 1230 int start_mi_row, end_mi_row, mi_rows_to_filter; 1231 if (!frame_filter_level) return; 1232 start_mi_row = 0; 1233 mi_rows_to_filter = cm->mi_rows; 1234 if (partial_frame && cm->mi_rows > 8) { 1235 start_mi_row = cm->mi_rows >> 1; 1236 start_mi_row &= 0xfffffff8; 1237 mi_rows_to_filter = MAX(cm->mi_rows / 8, 8); 1238 } 1239 end_mi_row = start_mi_row + mi_rows_to_filter; 1240 vp9_loop_filter_frame_init(cm, frame_filter_level); 1241 vp9_loop_filter_rows(cm->frame_to_show, cm, xd, 1242 start_mi_row, end_mi_row, 1243 y_only); 1244} 1245 1246int vp9_loop_filter_worker(void *arg1, void *arg2) { 1247 LFWorkerData *const lf_data = (LFWorkerData*)arg1; 1248 (void)arg2; 1249 vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd, 1250 lf_data->start, lf_data->stop, lf_data->y_only); 1251 return 1; 1252} 1253