12ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian/* 22ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 32ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian * 42ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 52ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 62ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 72ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 82ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 92ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian */ 102ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 112ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#include "./vpx_config.h" 122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#include "vpx_mem/vpx_mem.h" 142ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 152ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#include "vp9/common/vp9_reconinter.h" 162ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 172ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#include "vp9/decoder/vp9_dthread.h" 182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#include "vp9/decoder/vp9_decoder.h" 192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#if CONFIG_MULTITHREAD 212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic INLINE void mutex_lock(pthread_mutex_t *const mutex) { 222ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int kMaxTryLocks = 4000; 232ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int locked = 0; 242ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int i; 252ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 262ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < kMaxTryLocks; ++i) { 272ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (!pthread_mutex_trylock(mutex)) { 282ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian locked = 1; 292ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian break; 302ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 312ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 322ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 332ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (!locked) 342ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian pthread_mutex_lock(mutex); 352ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 362ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif // CONFIG_MULTITHREAD 372ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 382ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { 392ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#if CONFIG_MULTITHREAD 402ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int nsync = lf_sync->sync_range; 412ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 422ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (r && !(c & (nsync - 1))) { 43ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; 44ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian mutex_lock(mutex); 452ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 462ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian while (c > lf_sync->cur_sb_col[r - 1] - nsync) { 47ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); 482ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 49ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian pthread_mutex_unlock(mutex); 502ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 512ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#else 522ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian (void)lf_sync; 532ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian (void)r; 542ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian (void)c; 552ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif // CONFIG_MULTITHREAD 562ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 572ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 582ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, 592ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int sb_cols) { 602ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#if CONFIG_MULTITHREAD 612ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int nsync = lf_sync->sync_range; 622ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int cur; 632ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Only signal when there are enough filtered SB for next row to run. 642ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int sig = 1; 652ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 662ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (c < sb_cols - 1) { 672ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian cur = c; 682ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (c % nsync) 692ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian sig = 0; 702ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } else { 712ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian cur = sb_cols + nsync; 722ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 732ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 742ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (sig) { 752ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian mutex_lock(&lf_sync->mutex_[r]); 762ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 772ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_sync->cur_sb_col[r] = cur; 782ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 792ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian pthread_cond_signal(&lf_sync->cond_[r]); 802ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian pthread_mutex_unlock(&lf_sync->mutex_[r]); 812ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#else 832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian (void)lf_sync; 842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian (void)r; 852ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian (void)c; 862ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian (void)sb_cols; 872ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif // CONFIG_MULTITHREAD 882ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 892ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 902ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// Implement row loopfiltering for each thread. 912ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, 92ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian VP9_COMMON *const cm, 93ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian struct macroblockd_plane planes[MAX_MB_PLANE], 942ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int start, int stop, int y_only, 952ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian VP9LfSync *const lf_sync, int num_lf_workers) { 962ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int num_planes = y_only ? 1 : MAX_MB_PLANE; 972ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int r, c; // SB row and col 982ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; 992ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1002ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (r = start; r < stop; r += num_lf_workers) { 1012ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int mi_row = r << MI_BLOCK_SIZE_LOG2; 102ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; 1032ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1042ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (c = 0; c < sb_cols; ++c) { 1052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int mi_col = c << MI_BLOCK_SIZE_LOG2; 106ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian LOOP_FILTER_MASK lfm; 1072ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int plane; 1082ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1092ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian sync_read(lf_sync, r, c); 1102ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 111ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); 112ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); 1132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1142ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (plane = 0; plane < num_planes; ++plane) { 115ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); 1162ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 1172ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian sync_write(lf_sync, r, c, sb_cols); 1192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 1202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 1212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 1222ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1232ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// Row-based multi-threaded loopfilter hook 1242ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic int loop_filter_row_worker(void *arg1, void *arg2) { 1252ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian TileWorkerData *const tile_data = (TileWorkerData*)arg1; 1262ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian LFWorkerData *const lf_data = &tile_data->lfdata; 127ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian (void) arg2; 128ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes, 1292ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_data->start, lf_data->stop, lf_data->y_only, 1302ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_data->lf_sync, lf_data->num_lf_workers); 1312ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian return 1; 1322ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 1332ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1342ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// VP9 decoder: Implement multi-threaded loopfilter that uses the tile 1352ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// threads. 136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, 137ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian VP9Decoder *pbi, VP9_COMMON *cm, 1382ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int frame_filter_level, 139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int y_only) { 140ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian VP9LfSync *const lf_sync = &pbi->lf_row_sync; 141ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); 1422ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Number of superblock rows and cols 1432ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; 1444fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang const int tile_cols = 1 << cm->log2_tile_cols; 145ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); 1462ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int i; 1472ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1482ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Allocate memory used in thread synchronization. 1492ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // This always needs to be done even if frame_filter_level is 0. 1502ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (!cm->current_video_frame || cm->last_height != cm->height) { 1512ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (cm->last_height != cm->height) { 1522ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int aligned_last_height = 1532ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2); 1542ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const int last_sb_rows = 1552ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >> 1562ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian MI_BLOCK_SIZE_LOG2; 1572ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1582ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vp9_loop_filter_dealloc(lf_sync, last_sb_rows); 1592ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 1602ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1612ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width); 1622ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 1632ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1642ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (!frame_filter_level) return; 1652ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1662ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vp9_loop_filter_frame_init(cm, frame_filter_level); 1672ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1682ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Initialize cur_sb_col to -1 for all SB rows. 169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); 1702ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1712ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Set up loopfilter thread data. 1724fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // The decoder is using num_workers instead of pbi->num_tile_workers 1734fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // because it has been observed that using more threads on the 1744fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // loopfilter, than there are tile columns in the frame will hurt 1754fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // performance on Android. This is because the system will only 1764fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // schedule the tile decode workers on cores equal to the number 1774fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // of tile columns. Then if the decoder tries to use more threads for the 1784fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // loopfilter, it will hurt performance because of contention. If the 1794fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // multithreading code changes in the future then the number of workers 1804fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang // used by the loopfilter should be revisited. 1814fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang for (i = 0; i < num_workers; ++i) { 1822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian VP9Worker *const worker = &pbi->tile_workers[i]; 1832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; 1842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian LFWorkerData *const lf_data = &tile_data->lfdata; 1852ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1862ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian worker->hook = (VP9WorkerHook)loop_filter_row_worker; 1872ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1882ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Loopfilter data 189ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian lf_data->frame_buffer = frame; 1902ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_data->cm = cm; 191ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vp9_copy(lf_data->planes, pbi->mb.plane); 1922ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_data->start = i; 1932ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_data->stop = sb_rows; 1942ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_data->y_only = y_only; // always do all planes in decoder 1952ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 196ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian lf_data->lf_sync = lf_sync; 1974fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang lf_data->num_lf_workers = num_workers; 1982ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 1992ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Start loopfiltering 2004fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang if (i == num_workers - 1) { 201ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian winterface->execute(worker); 2022ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } else { 203ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian winterface->launch(worker); 2042ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2072ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Wait till all rows are finished 2084fb68e5dd4e93c7599dc905d861de11ac39c5585hkuang for (i = 0; i < num_workers; ++i) { 209ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian winterface->sync(&pbi->tile_workers[i]); 2102ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2112ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 2122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// Set up nsync by width. 2142ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic int get_sync_range(int width) { 2152ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // nsync numbers are picked by testing. For example, for 4k 2162ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // video, using 4 gives best performance. 2172ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (width < 640) 2182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian return 1; 2192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian else if (width <= 1280) 2202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian return 2; 2212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian else if (width <= 4096) 2222ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian return 4; 2232ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian else 2242ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian return 8; 2252ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 2262ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2272ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// Allocate memory for lf row synchronization 2282ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianvoid vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows, 2292ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int width) { 2302ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#if CONFIG_MULTITHREAD 2312ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int i; 2322ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2332ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian CHECK_MEM_ERROR(cm, lf_sync->mutex_, 2342ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); 2352ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < rows; ++i) { 2362ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian pthread_mutex_init(&lf_sync->mutex_[i], NULL); 2372ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2382ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2392ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian CHECK_MEM_ERROR(cm, lf_sync->cond_, 2402ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vpx_malloc(sizeof(*lf_sync->cond_) * rows)); 2412ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < rows; ++i) { 2422ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian pthread_cond_init(&lf_sync->cond_[i], NULL); 2432ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2442ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif // CONFIG_MULTITHREAD 2452ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2462ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, 2472ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); 2482ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2492ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Set up nsync. 2502ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian lf_sync->sync_range = get_sync_range(width); 2512ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 2522ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2532ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// Deallocate lf synchronization related mutex and data 2542ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianvoid vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) { 255ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#if !CONFIG_MULTITHREAD 256ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian (void)rows; 257ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif // !CONFIG_MULTITHREAD 258ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2592ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (lf_sync != NULL) { 260ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#if CONFIG_MULTITHREAD 2612ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int i; 2622ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2632ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (lf_sync->mutex_ != NULL) { 2642ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < rows; ++i) { 2652ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian pthread_mutex_destroy(&lf_sync->mutex_[i]); 2662ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2672ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vpx_free(lf_sync->mutex_); 2682ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2692ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (lf_sync->cond_ != NULL) { 2702ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < rows; ++i) { 2712ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian pthread_cond_destroy(&lf_sync->cond_[i]); 2722ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2732ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vpx_free(lf_sync->cond_); 2742ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 275ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif // CONFIG_MULTITHREAD 2762ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vpx_free(lf_sync->cur_sb_col); 2772ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // clear the structure as the source of this call may be a resize in which 2782ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // case this call will be followed by an _alloc() which may fail. 279ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vp9_zero(*lf_sync); 2802ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 2812ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 282