1/* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vpx_config.h" 12 13#include "vpx_mem/vpx_mem.h" 14 15#include "vp9/common/vp9_reconinter.h" 16 17#include "vp9/decoder/vp9_dthread.h" 18#include "vp9/decoder/vp9_decoder.h" 19 20#if CONFIG_MULTITHREAD 21static INLINE void mutex_lock(pthread_mutex_t *const mutex) { 22 const int kMaxTryLocks = 4000; 23 int locked = 0; 24 int i; 25 26 for (i = 0; i < kMaxTryLocks; ++i) { 27 if (!pthread_mutex_trylock(mutex)) { 28 locked = 1; 29 break; 30 } 31 } 32 33 if (!locked) 34 pthread_mutex_lock(mutex); 35} 36#endif // CONFIG_MULTITHREAD 37 38static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { 39#if CONFIG_MULTITHREAD 40 const int nsync = lf_sync->sync_range; 41 42 if (r && !(c & (nsync - 1))) { 43 mutex_lock(&lf_sync->mutex_[r - 1]); 44 45 while (c > lf_sync->cur_sb_col[r - 1] - nsync) { 46 pthread_cond_wait(&lf_sync->cond_[r - 1], 47 &lf_sync->mutex_[r - 1]); 48 } 49 pthread_mutex_unlock(&lf_sync->mutex_[r - 1]); 50 } 51#else 52 (void)lf_sync; 53 (void)r; 54 (void)c; 55#endif // CONFIG_MULTITHREAD 56} 57 58static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, 59 const int sb_cols) { 60#if CONFIG_MULTITHREAD 61 const int nsync = lf_sync->sync_range; 62 int cur; 63 // Only signal when there are enough filtered SB for next row to run. 64 int sig = 1; 65 66 if (c < sb_cols - 1) { 67 cur = c; 68 if (c % nsync) 69 sig = 0; 70 } else { 71 cur = sb_cols + nsync; 72 } 73 74 if (sig) { 75 mutex_lock(&lf_sync->mutex_[r]); 76 77 lf_sync->cur_sb_col[r] = cur; 78 79 pthread_cond_signal(&lf_sync->cond_[r]); 80 pthread_mutex_unlock(&lf_sync->mutex_[r]); 81 } 82#else 83 (void)lf_sync; 84 (void)r; 85 (void)c; 86 (void)sb_cols; 87#endif // CONFIG_MULTITHREAD 88} 89 90// Implement row loopfiltering for each thread. 91static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, 92 VP9_COMMON *const cm, MACROBLOCKD *const xd, 93 int start, int stop, int y_only, 94 VP9LfSync *const lf_sync, int num_lf_workers) { 95 const int num_planes = y_only ? 1 : MAX_MB_PLANE; 96 int r, c; // SB row and col 97 LOOP_FILTER_MASK lfm; 98 const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; 99 100 for (r = start; r < stop; r += num_lf_workers) { 101 const int mi_row = r << MI_BLOCK_SIZE_LOG2; 102 MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; 103 104 for (c = 0; c < sb_cols; ++c) { 105 const int mi_col = c << MI_BLOCK_SIZE_LOG2; 106 int plane; 107 108 sync_read(lf_sync, r, c); 109 110 vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); 111 vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm); 112 113 for (plane = 0; plane < num_planes; ++plane) { 114 vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); 115 } 116 117 sync_write(lf_sync, r, c, sb_cols); 118 } 119 } 120} 121 122// Row-based multi-threaded loopfilter hook 123static int loop_filter_row_worker(void *arg1, void *arg2) { 124 (void)arg2; 125 TileWorkerData *const tile_data = (TileWorkerData*)arg1; 126 LFWorkerData *const lf_data = &tile_data->lfdata; 127 128 loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd, 129 lf_data->start, lf_data->stop, lf_data->y_only, 130 lf_data->lf_sync, lf_data->num_lf_workers); 131 return 1; 132} 133 134// VP9 decoder: Implement multi-threaded loopfilter that uses the tile 135// threads. 136void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, 137 VP9_COMMON *cm, 138 MACROBLOCKD *xd, 139 int frame_filter_level, 140 int y_only, int partial_frame) { 141 // Number of superblock rows and cols 142 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; 143 int i; 144 (void)xd; 145 (void)partial_frame; 146 147 // Allocate memory used in thread synchronization. 148 // This always needs to be done even if frame_filter_level is 0. 149 if (!cm->current_video_frame || cm->last_height != cm->height) { 150 VP9LfSync *const lf_sync = &pbi->lf_row_sync; 151 152 if (cm->last_height != cm->height) { 153 const int aligned_last_height = 154 ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2); 155 const int last_sb_rows = 156 mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >> 157 MI_BLOCK_SIZE_LOG2; 158 159 vp9_loop_filter_dealloc(lf_sync, last_sb_rows); 160 } 161 162 vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width); 163 } 164 165 if (!frame_filter_level) return; 166 167 vp9_loop_filter_frame_init(cm, frame_filter_level); 168 169 // Initialize cur_sb_col to -1 for all SB rows. 170 vpx_memset(pbi->lf_row_sync.cur_sb_col, -1, 171 sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows); 172 173 // Set up loopfilter thread data. 174 for (i = 0; i < pbi->num_tile_workers; ++i) { 175 VP9Worker *const worker = &pbi->tile_workers[i]; 176 TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; 177 LFWorkerData *const lf_data = &tile_data->lfdata; 178 179 worker->hook = (VP9WorkerHook)loop_filter_row_worker; 180 181 // Loopfilter data 182 lf_data->frame_buffer = get_frame_new_buffer(cm); 183 lf_data->cm = cm; 184 lf_data->xd = pbi->mb; 185 lf_data->start = i; 186 lf_data->stop = sb_rows; 187 lf_data->y_only = y_only; // always do all planes in decoder 188 189 lf_data->lf_sync = &pbi->lf_row_sync; 190 lf_data->num_lf_workers = pbi->num_tile_workers; 191 192 // Start loopfiltering 193 if (i == pbi->num_tile_workers - 1) { 194 vp9_worker_execute(worker); 195 } else { 196 vp9_worker_launch(worker); 197 } 198 } 199 200 // Wait till all rows are finished 201 for (i = 0; i < pbi->num_tile_workers; ++i) { 202 vp9_worker_sync(&pbi->tile_workers[i]); 203 } 204} 205 206// Set up nsync by width. 207static int get_sync_range(int width) { 208 // nsync numbers are picked by testing. For example, for 4k 209 // video, using 4 gives best performance. 210 if (width < 640) 211 return 1; 212 else if (width <= 1280) 213 return 2; 214 else if (width <= 4096) 215 return 4; 216 else 217 return 8; 218} 219 220// Allocate memory for lf row synchronization 221void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows, 222 int width) { 223#if CONFIG_MULTITHREAD 224 int i; 225 226 CHECK_MEM_ERROR(cm, lf_sync->mutex_, 227 vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); 228 for (i = 0; i < rows; ++i) { 229 pthread_mutex_init(&lf_sync->mutex_[i], NULL); 230 } 231 232 CHECK_MEM_ERROR(cm, lf_sync->cond_, 233 vpx_malloc(sizeof(*lf_sync->cond_) * rows)); 234 for (i = 0; i < rows; ++i) { 235 pthread_cond_init(&lf_sync->cond_[i], NULL); 236 } 237#endif // CONFIG_MULTITHREAD 238 239 CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, 240 vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); 241 242 // Set up nsync. 243 lf_sync->sync_range = get_sync_range(width); 244} 245 246// Deallocate lf synchronization related mutex and data 247void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) { 248#if CONFIG_MULTITHREAD 249 if (lf_sync != NULL) { 250 int i; 251 252 if (lf_sync->mutex_ != NULL) { 253 for (i = 0; i < rows; ++i) { 254 pthread_mutex_destroy(&lf_sync->mutex_[i]); 255 } 256 vpx_free(lf_sync->mutex_); 257 } 258 if (lf_sync->cond_ != NULL) { 259 for (i = 0; i < rows; ++i) { 260 pthread_cond_destroy(&lf_sync->cond_[i]); 261 } 262 vpx_free(lf_sync->cond_); 263 } 264 265 vpx_free(lf_sync->cur_sb_col); 266 // clear the structure as the source of this call may be a resize in which 267 // case this call will be followed by an _alloc() which may fail. 268 vpx_memset(lf_sync, 0, sizeof(*lf_sync)); 269 } 270#else 271 (void)rows; 272 if (lf_sync != NULL) { 273 vpx_free(lf_sync->cur_sb_col); 274 vpx_memset(lf_sync, 0, sizeof(*lf_sync)); 275 } 276#endif // CONFIG_MULTITHREAD 277} 278