176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org/*
276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org *
476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org *  Use of this source code is governed by a BSD-style license
576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org *  that can be found in the LICENSE file in the root of the source
676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org *  tree. An additional intellectual property rights grant can be found
776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org *  in the file PATENTS.  All contributing project authors may
876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org *  be found in the AUTHORS file in the root of the source tree.
976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org */
1076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
1176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#include "./vpx_config.h"
1293a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org
1393a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org#include "vpx_mem/vpx_mem.h"
1493a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org
1576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#include "vp9/common/vp9_reconinter.h"
1693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org
1776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#include "vp9/decoder/vp9_dthread.h"
1893a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org#include "vp9/decoder/vp9_decoder.h"
1976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
2076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD
2176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic INLINE void mutex_lock(pthread_mutex_t *const mutex) {
2276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  const int kMaxTryLocks = 4000;
2376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  int locked = 0;
2476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  int i;
2576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
2676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  for (i = 0; i < kMaxTryLocks; ++i) {
2776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    if (!pthread_mutex_trylock(mutex)) {
2876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      locked = 1;
2976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      break;
3076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    }
3176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
3276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
3376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  if (!locked)
3476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    pthread_mutex_lock(mutex);
3576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
3676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif  // CONFIG_MULTITHREAD
3776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
3876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
3976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD
4076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  const int nsync = lf_sync->sync_range;
4176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
4276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  if (r && !(c & (nsync - 1))) {
43693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com    pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
44693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com    mutex_lock(mutex);
4576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
4676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
47693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com      pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
4876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    }
49693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com    pthread_mutex_unlock(mutex);
5076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
5176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#else
5276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  (void)lf_sync;
5376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  (void)r;
5476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  (void)c;
5576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif  // CONFIG_MULTITHREAD
5676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
5776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
5876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c,
5976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                              const int sb_cols) {
6076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD
6176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  const int nsync = lf_sync->sync_range;
6276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  int cur;
6376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // Only signal when there are enough filtered SB for next row to run.
6476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  int sig = 1;
6576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
6676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  if (c < sb_cols - 1) {
6776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    cur = c;
6876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    if (c % nsync)
6976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      sig = 0;
7076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  } else {
7176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    cur = sb_cols + nsync;
7276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
7376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
7476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  if (sig) {
7576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    mutex_lock(&lf_sync->mutex_[r]);
7676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
7776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    lf_sync->cur_sb_col[r] = cur;
7876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
7976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    pthread_cond_signal(&lf_sync->cond_[r]);
8076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    pthread_mutex_unlock(&lf_sync->mutex_[r]);
8176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
8276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#else
8376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  (void)lf_sync;
8476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  (void)r;
8576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  (void)c;
8676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  (void)sb_cols;
8776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif  // CONFIG_MULTITHREAD
8876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
8976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
9076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Implement row loopfiltering for each thread.
9176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
92118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org                                VP9_COMMON *const cm,
93118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org                                struct macroblockd_plane planes[MAX_MB_PLANE],
9476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                                int start, int stop, int y_only,
9576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                                VP9LfSync *const lf_sync, int num_lf_workers) {
9676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
9776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  int r, c;  // SB row and col
9876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
9976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
10076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  for (r = start; r < stop; r += num_lf_workers) {
10176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    const int mi_row = r << MI_BLOCK_SIZE_LOG2;
10287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride;
10376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
10476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    for (c = 0; c < sb_cols; ++c) {
10576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      const int mi_col = c << MI_BLOCK_SIZE_LOG2;
106693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com      LOOP_FILTER_MASK lfm;
10776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      int plane;
10876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
10976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      sync_read(lf_sync, r, c);
11076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
111118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org      vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
112693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com      vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
11376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
11476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      for (plane = 0; plane < num_planes; ++plane) {
115118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org        vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
11676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      }
11776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
11876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      sync_write(lf_sync, r, c, sb_cols);
11976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    }
12076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
12176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
12276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
12376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Row-based multi-threaded loopfilter hook
12487997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgstatic int loop_filter_row_worker(TileWorkerData *const tile_data,
12587997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org                                  void *unused) {
12676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  LFWorkerData *const lf_data = &tile_data->lfdata;
12787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org  (void)unused;
128118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org  loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
12976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                      lf_data->start, lf_data->stop, lf_data->y_only,
13076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                      lf_data->lf_sync, lf_data->num_lf_workers);
13176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  return 1;
13276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
13376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
13476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// VP9 decoder: Implement multi-threaded loopfilter that uses the tile
13576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// threads.
1367765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.orgvoid vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
1377765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org                              VP9Decoder *pbi, VP9_COMMON *cm,
13876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                              int frame_filter_level,
1397765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org                              int y_only) {
140693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com  VP9LfSync *const lf_sync = &pbi->lf_row_sync;
141e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org  const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
14276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // Number of superblock rows and cols
14376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
14493a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  const int tile_cols = 1 << cm->log2_tile_cols;
1457765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org  const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
14676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  int i;
14776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
14887997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org  if (!frame_filter_level) return;
14987997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org
150d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  if (!lf_sync->sync_range || cm->last_height != cm->height) {
151d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    vp9_loop_filter_dealloc(lf_sync);
15287997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.org    vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width);
15376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
15476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
15576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  vp9_loop_filter_frame_init(cm, frame_filter_level);
15676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
15776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // Initialize cur_sb_col to -1 for all SB rows.
158693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com  vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
15976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
16076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // Set up loopfilter thread data.
16193a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // The decoder is using num_workers instead of pbi->num_tile_workers
16293a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // because it has been observed that using more threads on the
16393a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // loopfilter, than there are tile columns in the frame will hurt
16493a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // performance on Android. This is because the system will only
16593a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // schedule the tile decode workers on cores equal to the number
16693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // of tile columns. Then if the decoder tries to use more threads for the
16793a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // loopfilter, it will hurt performance because of contention. If the
16893a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // multithreading code changes in the future then the number of workers
16993a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  // used by the loopfilter should be revisited.
17093a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  for (i = 0; i < num_workers; ++i) {
17176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    VP9Worker *const worker = &pbi->tile_workers[i];
17276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
17376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    LFWorkerData *const lf_data = &tile_data->lfdata;
17476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
17576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    worker->hook = (VP9WorkerHook)loop_filter_row_worker;
17676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
17776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    // Loopfilter data
1787765c078fa920ba6c949c15f16b6cc979d8bb95bjohannkoenig@chromium.org    lf_data->frame_buffer = frame;
17976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    lf_data->cm = cm;
180118f379ec73bf762ee63784bc5f41ffd41107470johannkoenig@chromium.org    vp9_copy(lf_data->planes, pbi->mb.plane);
18176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    lf_data->start = i;
18276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    lf_data->stop = sb_rows;
18376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    lf_data->y_only = y_only;   // always do all planes in decoder
18476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
185693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com    lf_data->lf_sync = lf_sync;
18693a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org    lf_data->num_lf_workers = num_workers;
18776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
18876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    // Start loopfiltering
18993a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org    if (i == num_workers - 1) {
190e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      winterface->execute(worker);
19176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    } else {
192e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org      winterface->launch(worker);
19376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    }
19476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
19576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
19676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // Wait till all rows are finished
19793a74791c8e808ea76001ee07693aa2a5fdd3500johannkoenig@chromium.org  for (i = 0; i < num_workers; ++i) {
198e2064011d36b2008099446503f28e64d445060ecjohannkoenig@chromium.org    winterface->sync(&pbi->tile_workers[i]);
19976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
20076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
20176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
20276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Set up nsync by width.
20376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.orgstatic int get_sync_range(int width) {
20476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // nsync numbers are picked by testing. For example, for 4k
20576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // video, using 4 gives best performance.
20676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  if (width < 640)
20776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    return 1;
20876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  else if (width <= 1280)
20976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    return 2;
21076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  else if (width <= 4096)
21176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    return 4;
21276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  else
21376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    return 8;
21476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
21576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
21676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Allocate memory for lf row synchronization
21787997d490ae52aa962a985c95b3cddf7f8832641johannkoenig@chromium.orgvoid vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
21876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                           int width) {
219d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  lf_sync->rows = rows;
22076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#if CONFIG_MULTITHREAD
221d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  {
222d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    int i;
22376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
224d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    CHECK_MEM_ERROR(cm, lf_sync->mutex_,
225d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                    vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
226d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    for (i = 0; i < rows; ++i) {
227d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      pthread_mutex_init(&lf_sync->mutex_[i], NULL);
228d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
22976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
230d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    CHECK_MEM_ERROR(cm, lf_sync->cond_,
231d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                    vpx_malloc(sizeof(*lf_sync->cond_) * rows));
232d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    for (i = 0; i < rows; ++i) {
233d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      pthread_cond_init(&lf_sync->cond_[i], NULL);
234d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
23576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
23676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org#endif  // CONFIG_MULTITHREAD
23776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
23876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
23976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org                  vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
24076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
24176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  // Set up nsync.
24276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  lf_sync->sync_range = get_sync_range(width);
24376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
24476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
24576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org// Deallocate lf synchronization related mutex and data
246d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgvoid vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
24776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  if (lf_sync != NULL) {
248693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com#if CONFIG_MULTITHREAD
24976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    int i;
25076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org
25176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    if (lf_sync->mutex_ != NULL) {
252d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < lf_sync->rows; ++i) {
25376e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org        pthread_mutex_destroy(&lf_sync->mutex_[i]);
25476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      }
25576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      vpx_free(lf_sync->mutex_);
25676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    }
25776e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    if (lf_sync->cond_ != NULL) {
258d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < lf_sync->rows; ++i) {
25976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org        pthread_cond_destroy(&lf_sync->cond_[i]);
26076e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      }
26176e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org      vpx_free(lf_sync->cond_);
26276e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    }
263693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com#endif  // CONFIG_MULTITHREAD
26476e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    vpx_free(lf_sync->cur_sb_col);
26576e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    // clear the structure as the source of this call may be a resize in which
26676e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org    // case this call will be followed by an _alloc() which may fail.
267693441efe611de7ca09c00f4e79776f604b689f4joeyparrish@google.com    vp9_zero(*lf_sync);
26876e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org  }
26976e516e2154f353aa02c504bac88afb0f95fefa7johannkoenig@chromium.org}
270