1/* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "vp9/encoder/vp9_encodeframe.h" 12#include "vp9/encoder/vp9_encoder.h" 13#include "vp9/encoder/vp9_ethread.h" 14#include "vpx_dsp/vpx_dsp_common.h" 15 16static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { 17 int i, j, k, l, m, n; 18 19 for (i = 0; i < REFERENCE_MODES; i++) 20 td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i]; 21 22 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 23 td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i]; 24 25 for (i = 0; i < TX_SIZES; i++) 26 for (j = 0; j < PLANE_TYPES; j++) 27 for (k = 0; k < REF_TYPES; k++) 28 for (l = 0; l < COEF_BANDS; l++) 29 for (m = 0; m < COEFF_CONTEXTS; m++) 30 for (n = 0; n < ENTROPY_TOKENS; n++) 31 td->rd_counts.coef_counts[i][j][k][l][m][n] += 32 td_t->rd_counts.coef_counts[i][j][k][l][m][n]; 33} 34 35static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) { 36 VP9_COMP *const cpi = thread_data->cpi; 37 const VP9_COMMON *const cm = &cpi->common; 38 const int tile_cols = 1 << cm->log2_tile_cols; 39 const int tile_rows = 1 << cm->log2_tile_rows; 40 int t; 41 42 (void) unused; 43 44 for (t = thread_data->start; t < tile_rows * tile_cols; 45 t += cpi->num_workers) { 46 int tile_row = t / tile_cols; 47 int tile_col = t % tile_cols; 48 49 vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col); 50 } 51 52 return 0; 53} 54 55static int get_max_tile_cols(VP9_COMP *cpi) { 56 const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2); 57 int mi_cols = aligned_width >> MI_SIZE_LOG2; 58 int min_log2_tile_cols, max_log2_tile_cols; 59 int log2_tile_cols; 60 61 vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); 62 log2_tile_cols = clamp(cpi->oxcf.tile_columns, 63 min_log2_tile_cols, max_log2_tile_cols); 64 return (1 << log2_tile_cols); 65} 66 67void vp9_encode_tiles_mt(VP9_COMP *cpi) { 68 VP9_COMMON *const cm = &cpi->common; 69 const int tile_cols = 1 << cm->log2_tile_cols; 70 const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); 71 const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); 72 int i; 73 74 vp9_init_tile_data(cpi); 75 76 // Only run once to create threads and allocate thread data. 77 if (cpi->num_workers == 0) { 78 int allocated_workers = num_workers; 79 80 // While using SVC, we need to allocate threads according to the highest 81 // resolution. 82 if (cpi->use_svc) { 83 int max_tile_cols = get_max_tile_cols(cpi); 84 allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); 85 } 86 87 CHECK_MEM_ERROR(cm, cpi->workers, 88 vpx_malloc(allocated_workers * sizeof(*cpi->workers))); 89 90 CHECK_MEM_ERROR(cm, cpi->tile_thr_data, 91 vpx_calloc(allocated_workers, 92 sizeof(*cpi->tile_thr_data))); 93 94 for (i = 0; i < allocated_workers; i++) { 95 VPxWorker *const worker = &cpi->workers[i]; 96 EncWorkerData *thread_data = &cpi->tile_thr_data[i]; 97 98 ++cpi->num_workers; 99 winterface->init(worker); 100 101 if (i < allocated_workers - 1) { 102 thread_data->cpi = cpi; 103 104 // Allocate thread data. 105 CHECK_MEM_ERROR(cm, thread_data->td, 106 vpx_memalign(32, sizeof(*thread_data->td))); 107 vp9_zero(*thread_data->td); 108 109 // Set up pc_tree. 110 thread_data->td->leaf_tree = NULL; 111 thread_data->td->pc_tree = NULL; 112 vp9_setup_pc_tree(cm, thread_data->td); 113 114 // Allocate frame counters in thread data. 115 CHECK_MEM_ERROR(cm, thread_data->td->counts, 116 vpx_calloc(1, sizeof(*thread_data->td->counts))); 117 118 // Create threads 119 if (!winterface->reset(worker)) 120 vpx_internal_error(&cm->error, VPX_CODEC_ERROR, 121 "Tile encoder thread creation failed"); 122 } else { 123 // Main thread acts as a worker and uses the thread data in cpi. 124 thread_data->cpi = cpi; 125 thread_data->td = &cpi->td; 126 } 127 128 winterface->sync(worker); 129 } 130 } 131 132 for (i = 0; i < num_workers; i++) { 133 VPxWorker *const worker = &cpi->workers[i]; 134 EncWorkerData *thread_data; 135 136 worker->hook = (VPxWorkerHook)enc_worker_hook; 137 worker->data1 = &cpi->tile_thr_data[i]; 138 worker->data2 = NULL; 139 thread_data = (EncWorkerData*)worker->data1; 140 141 // Before encoding a frame, copy the thread data from cpi. 142 if (thread_data->td != &cpi->td) { 143 thread_data->td->mb = cpi->td.mb; 144 thread_data->td->rd_counts = cpi->td.rd_counts; 145 } 146 if (thread_data->td->counts != &cpi->common.counts) { 147 memcpy(thread_data->td->counts, &cpi->common.counts, 148 sizeof(cpi->common.counts)); 149 } 150 151 // Handle use_nonrd_pick_mode case. 152 if (cpi->sf.use_nonrd_pick_mode) { 153 MACROBLOCK *const x = &thread_data->td->mb; 154 MACROBLOCKD *const xd = &x->e_mbd; 155 struct macroblock_plane *const p = x->plane; 156 struct macroblockd_plane *const pd = xd->plane; 157 PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; 158 int j; 159 160 for (j = 0; j < MAX_MB_PLANE; ++j) { 161 p[j].coeff = ctx->coeff_pbuf[j][0]; 162 p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; 163 pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; 164 p[j].eobs = ctx->eobs_pbuf[j][0]; 165 } 166 } 167 } 168 169 // Encode a frame 170 for (i = 0; i < num_workers; i++) { 171 VPxWorker *const worker = &cpi->workers[i]; 172 EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; 173 174 // Set the starting tile for each thread. 175 thread_data->start = i; 176 177 if (i == cpi->num_workers - 1) 178 winterface->execute(worker); 179 else 180 winterface->launch(worker); 181 } 182 183 // Encoding ends. 184 for (i = 0; i < num_workers; i++) { 185 VPxWorker *const worker = &cpi->workers[i]; 186 winterface->sync(worker); 187 } 188 189 for (i = 0; i < num_workers; i++) { 190 VPxWorker *const worker = &cpi->workers[i]; 191 EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; 192 193 // Accumulate counters. 194 if (i < cpi->num_workers - 1) { 195 vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); 196 accumulate_rd_opt(&cpi->td, thread_data->td); 197 } 198 } 199} 200