176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org/* 276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org * 476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org * Use of this source code is governed by a BSD-style license 576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org * that can be found in the LICENSE file in the root of the source 676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org * tree. An additional intellectual property rights grant can be found 776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org * in the file PATENTS. All contributing project authors may 876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org * be found in the AUTHORS file in the root of the source tree. 976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org */ 1076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 1176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#include "./vpx_config.h" 1293a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org 1393a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org#include "vpx_mem/vpx_mem.h" 1493a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org 1576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#include "vp9/common/vp9_reconinter.h" 1693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org 1776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#include "vp9/decoder/vp9_dthread.h" 1893a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org#include "vp9/decoder/vp9_decoder.h" 1976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 2076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD 2176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic INLINE void mutex_lock(pthread_mutex_t *const mutex) { 2276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int kMaxTryLocks = 4000; 2376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int locked = 0; 2476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int i; 2576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 2676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org for (i = 0; i < kMaxTryLocks; ++i) { 2776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (!pthread_mutex_trylock(mutex)) { 2876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org locked = 1; 2976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org break; 3076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 3176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 3276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 3376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (!locked) 3476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org pthread_mutex_lock(mutex); 3576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 3676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif // CONFIG_MULTITHREAD 3776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 3876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { 3976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD 4076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int nsync = lf_sync->sync_range; 4176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 4276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (r && !(c & (nsync - 1))) { 43693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; 44693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com mutex_lock(mutex); 4576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 4676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org while (c > lf_sync->cur_sb_col[r - 1] - nsync) { 47693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); 4876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 49693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com pthread_mutex_unlock(mutex); 5076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 5176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#else 5276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org (void)lf_sync; 5376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org (void)r; 5476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org (void)c; 5576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif // CONFIG_MULTITHREAD 5676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 5776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 5876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, 5976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int sb_cols) { 6076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD 6176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int nsync = lf_sync->sync_range; 6276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int cur; 6376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Only signal when there are enough filtered SB for next row to run. 6476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int sig = 1; 6576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 6676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (c < sb_cols - 1) { 6776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org cur = c; 6876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (c % nsync) 6976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org sig = 0; 7076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } else { 7176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org cur = sb_cols + nsync; 7276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 7376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 7476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (sig) { 7576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org mutex_lock(&lf_sync->mutex_[r]); 7676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 7776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_sync->cur_sb_col[r] = cur; 7876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 7976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org pthread_cond_signal(&lf_sync->cond_[r]); 8076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org pthread_mutex_unlock(&lf_sync->mutex_[r]); 8176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 8276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#else 8376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org (void)lf_sync; 8476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org (void)r; 8576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org (void)c; 8676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org (void)sb_cols; 8776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif // CONFIG_MULTITHREAD 8876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 8976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 9076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Implement row loopfiltering for each thread. 9176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, 92118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org VP9_COMMON *const cm, 93118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org struct macroblockd_plane planes[MAX_MB_PLANE], 9476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int start, int stop, int y_only, 9576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org VP9LfSync *const lf_sync, int num_lf_workers) { 9676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int num_planes = y_only ? 1 : MAX_MB_PLANE; 9776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int r, c; // SB row and col 9876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; 9976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 10076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org for (r = start; r < stop; r += num_lf_workers) { 10176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int mi_row = r << MI_BLOCK_SIZE_LOG2; 10287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride; 10376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 10476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org for (c = 0; c < sb_cols; ++c) { 10576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int mi_col = c << MI_BLOCK_SIZE_LOG2; 106693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com LOOP_FILTER_MASK lfm; 10776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int plane; 10876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 10976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org sync_read(lf_sync, r, c); 11076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 111118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); 112693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); 11376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 11476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org for (plane = 0; plane < num_planes; ++plane) { 115118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); 11676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 11776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 11876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org sync_write(lf_sync, r, c, sb_cols); 11976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 12076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 12176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 12276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 12376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Row-based multi-threaded loopfilter hook 12487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgstatic int loop_filter_row_worker(TileWorkerData *const tile_data, 12587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org void *unused) { 12676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org LFWorkerData *const lf_data = &tile_data->lfdata; 12787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org (void)unused; 128118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes, 12976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_data->start, lf_data->stop, lf_data->y_only, 13076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_data->lf_sync, lf_data->num_lf_workers); 13176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org return 1; 13276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 13376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 13476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// VP9 decoder: Implement multi-threaded loopfilter that uses the tile 13576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// threads. 1367765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.orgvoid vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, 1377765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org VP9Decoder *pbi, VP9_COMMON *cm, 13876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int frame_filter_level, 1397765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org int y_only) { 140693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com VP9LfSync *const lf_sync = &pbi->lf_row_sync; 141e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); 14276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Number of superblock rows and cols 14376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; 14493a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org const int tile_cols = 1 << cm->log2_tile_cols; 1457765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); 14676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int i; 14776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 14887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org if (!frame_filter_level) return; 14987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org 150d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org if (!lf_sync->sync_range || cm->last_height != cm->height) { 151d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vp9_loop_filter_dealloc(lf_sync); 15287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width); 15376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 15476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 15576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org vp9_loop_filter_frame_init(cm, frame_filter_level); 15676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 15776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Initialize cur_sb_col to -1 for all SB rows. 158693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); 15976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 16076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Set up loopfilter thread data. 16193a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // The decoder is using num_workers instead of pbi->num_tile_workers 16293a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // because it has been observed that using more threads on the 16393a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // loopfilter, than there are tile columns in the frame will hurt 16493a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // performance on Android. This is because the system will only 16593a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // schedule the tile decode workers on cores equal to the number 16693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // of tile columns. Then if the decoder tries to use more threads for the 16793a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // loopfilter, it will hurt performance because of contention. If the 16893a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // multithreading code changes in the future then the number of workers 16993a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org // used by the loopfilter should be revisited. 17093a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org for (i = 0; i < num_workers; ++i) { 17176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org VP9Worker *const worker = &pbi->tile_workers[i]; 17276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; 17376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org LFWorkerData *const lf_data = &tile_data->lfdata; 17476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 17576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org worker->hook = (VP9WorkerHook)loop_filter_row_worker; 17676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 17776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Loopfilter data 1787765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org lf_data->frame_buffer = frame; 17976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_data->cm = cm; 180118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org vp9_copy(lf_data->planes, pbi->mb.plane); 18176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_data->start = i; 18276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_data->stop = sb_rows; 18376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_data->y_only = y_only; // always do all planes in decoder 18476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 185693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com lf_data->lf_sync = lf_sync; 18693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org lf_data->num_lf_workers = num_workers; 18776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 18876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Start loopfiltering 18993a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org if (i == num_workers - 1) { 190e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org winterface->execute(worker); 19176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } else { 192e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org winterface->launch(worker); 19376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 19476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 19576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 19676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Wait till all rows are finished 19793a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org for (i = 0; i < num_workers; ++i) { 198e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org winterface->sync(&pbi->tile_workers[i]); 19976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 20076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 20176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 20276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Set up nsync by width. 20376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic int get_sync_range(int width) { 20476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // nsync numbers are picked by testing. For example, for 4k 20576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // video, using 4 gives best performance. 20676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (width < 640) 20776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org return 1; 20876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org else if (width <= 1280) 20976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org return 2; 21076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org else if (width <= 4096) 21176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org return 4; 21276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org else 21376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org return 8; 21476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 21576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 21676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Allocate memory for lf row synchronization 21787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgvoid vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, 21876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int width) { 219d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org lf_sync->rows = rows; 22076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD 221d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org { 222d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org int i; 22376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 224d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org CHECK_MEM_ERROR(cm, lf_sync->mutex_, 225d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); 226d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < rows; ++i) { 227d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org pthread_mutex_init(&lf_sync->mutex_[i], NULL); 228d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 22976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 230d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org CHECK_MEM_ERROR(cm, lf_sync->cond_, 231d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org vpx_malloc(sizeof(*lf_sync->cond_) * rows)); 232d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < rows; ++i) { 233d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org pthread_cond_init(&lf_sync->cond_[i], NULL); 234d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org } 23576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 23676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif // CONFIG_MULTITHREAD 23776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 23876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, 23976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); 24076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 24176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // Set up nsync. 24276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org lf_sync->sync_range = get_sync_range(width); 24376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 24476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 24576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Deallocate lf synchronization related mutex and data 246d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgvoid vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { 24776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (lf_sync != NULL) { 248693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com#if CONFIG_MULTITHREAD 24976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org int i; 25076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org 25176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (lf_sync->mutex_ != NULL) { 252d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < lf_sync->rows; ++i) { 25376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org pthread_mutex_destroy(&lf_sync->mutex_[i]); 25476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 25576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org vpx_free(lf_sync->mutex_); 25676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 25776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org if (lf_sync->cond_ != NULL) { 258d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org for (i = 0; i < lf_sync->rows; ++i) { 25976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org pthread_cond_destroy(&lf_sync->cond_[i]); 26076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 26176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org vpx_free(lf_sync->cond_); 26276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 263693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com#endif // CONFIG_MULTITHREAD 26476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org vpx_free(lf_sync->cur_sb_col); 26576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // clear the structure as the source of this call may be a resize in which 26676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org // case this call will be followed by an _alloc() which may fail. 267693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com vp9_zero(*lf_sync); 26876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org } 26976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org} 270