2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 #include "vp9/encoder/vp9_encoder.h"
14 #include "vp9/encoder/vp9_ethread.h"
15 #include "vp9/encoder/vp9_multi_thread.h"
17 void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
19 RowMTInfo *row_mt_info;
20 JobQueueHandle *job_queue_hdl = NULL;
22 JobNode *job_info = NULL;
23 #if CONFIG_MULTITHREAD
24 pthread_mutex_t *mutex_handle = NULL;
27 row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]);
28 job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl;
29 #if CONFIG_MULTITHREAD
30 mutex_handle = &row_mt_info->job_mutex;
33 // lock the mutex for queue access
34 #if CONFIG_MULTITHREAD
35 pthread_mutex_lock(mutex_handle);
37 next = job_queue_hdl->next;
39 JobQueue *job_queue = (JobQueue *)next;
40 job_info = &job_queue->job_info;
41 // Update the next job in the queue
42 job_queue_hdl->next = job_queue->next;
43 job_queue_hdl->num_jobs_acquired++;
46 #if CONFIG_MULTITHREAD
47 pthread_mutex_unlock(mutex_handle);
53 void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
54 struct VP9Common *cm = &cpi->common;
55 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
56 int tile_row, tile_col;
57 const int tile_cols = 1 << cm->log2_tile_cols;
58 const int tile_rows = 1 << cm->log2_tile_rows;
59 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
60 int jobs_per_tile_col, total_jobs;
62 jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows);
63 // Calculate the total number of jobs
64 total_jobs = jobs_per_tile_col * tile_cols;
66 multi_thread_ctxt->allocated_tile_cols = tile_cols;
67 multi_thread_ctxt->allocated_tile_rows = tile_rows;
68 multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col;
70 multi_thread_ctxt->job_queue =
71 (JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue));
73 #if CONFIG_MULTITHREAD
74 // Create mutex for each tile
75 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
76 RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
77 pthread_mutex_init(&row_mt_info->job_mutex, NULL);
81 // Allocate memory for row based multi-threading
82 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
83 TileDataEnc *this_tile = &cpi->tile_data[tile_col];
84 vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
85 if (cpi->sf.adaptive_rd_thresh_row_mt) {
87 (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
89 this_tile->row_base_thresh_freq_fact =
90 (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
91 sizeof(*(this_tile->row_base_thresh_freq_fact)));
92 for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
93 this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
97 // Assign the sync pointer of tile row zero for every tile row > 0
98 for (tile_row = 1; tile_row < tile_rows; tile_row++) {
99 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
100 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
101 TileDataEnc *this_col_tile = &cpi->tile_data[tile_col];
102 this_tile->row_mt_sync = this_col_tile->row_mt_sync;
106 // Calculate the number of vertical units in the given tile row
107 for (tile_row = 0; tile_row < tile_rows; tile_row++) {
108 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols];
109 TileInfo *tile_info = &this_tile->tile_info;
110 multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
111 get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
114 #if CONFIG_MULTITHREAD
115 for (tile_row = 0; tile_row < tile_rows; tile_row++) {
116 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
117 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
119 CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,
120 vpx_malloc(sizeof(*this_tile->search_count_mutex)));
122 pthread_mutex_init(this_tile->search_count_mutex, NULL);
124 CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,
125 vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));
127 pthread_mutex_init(this_tile->enc_row_mt_mutex, NULL);
133 void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
134 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
136 #if CONFIG_MULTITHREAD
140 // Deallocate memory for job queue
141 if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);
143 #if CONFIG_MULTITHREAD
144 // Destroy mutex for each tile
145 for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
147 RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
148 if (row_mt_info) pthread_mutex_destroy(&row_mt_info->job_mutex);
152 // Free row based multi-threading sync memory
153 for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
155 TileDataEnc *this_tile = &cpi->tile_data[tile_col];
156 vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
159 #if CONFIG_MULTITHREAD
160 for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
162 for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
164 TileDataEnc *this_tile =
165 &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
167 if (cpi->sf.adaptive_rd_thresh_row_mt) {
168 if (this_tile->row_base_thresh_freq_fact != NULL) {
169 vpx_free(this_tile->row_base_thresh_freq_fact);
170 this_tile->row_base_thresh_freq_fact = NULL;
173 pthread_mutex_destroy(this_tile->search_count_mutex);
174 vpx_free(this_tile->search_count_mutex);
175 this_tile->search_count_mutex = NULL;
176 pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
177 vpx_free(this_tile->enc_row_mt_mutex);
178 this_tile->enc_row_mt_mutex = NULL;
184 void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
185 VP9_COMMON *const cm = &cpi->common;
186 const int tile_cols = 1 << cm->log2_tile_cols;
187 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
190 for (i = 0; i < tile_cols; i++) {
191 TileDataEnc *this_tile = &cpi->tile_data[i];
192 int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows;
194 // Initialize cur_col to -1 for all rows.
195 memset(this_tile->row_mt_sync.cur_col, -1,
196 sizeof(*this_tile->row_mt_sync.cur_col) * jobs_per_tile_col);
197 vp9_zero(this_tile->fp_data);
198 this_tile->fp_data.image_data_start_row = INVALID_ROW;
202 void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
203 int tile_cols, int num_workers) {
207 // Allocating the threads for the tiles
208 for (i = 0; i < num_workers; i++) {
209 multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++;
210 if (tile_id == tile_cols) tile_id = 0;
214 int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt,
216 RowMTInfo *row_mt_info;
217 JobQueueHandle *job_queue_hndl;
218 #if CONFIG_MULTITHREAD
219 pthread_mutex_t *mutex;
221 int num_jobs_remaining;
223 row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id];
224 job_queue_hndl = &row_mt_info->job_queue_hdl;
225 #if CONFIG_MULTITHREAD
226 mutex = &row_mt_info->job_mutex;
229 #if CONFIG_MULTITHREAD
230 pthread_mutex_lock(mutex);
233 multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired;
234 #if CONFIG_MULTITHREAD
235 pthread_mutex_unlock(mutex);
238 return (num_jobs_remaining);
241 void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) {
242 VP9_COMMON *const cm = &cpi->common;
243 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
244 JobQueue *job_queue = multi_thread_ctxt->job_queue;
245 const int tile_cols = 1 << cm->log2_tile_cols;
246 int job_row_num, jobs_per_tile, jobs_per_tile_col, total_jobs;
247 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
250 jobs_per_tile_col = (job_type != ENCODE_JOB) ? cm->mb_rows : sb_rows;
251 total_jobs = jobs_per_tile_col * tile_cols;
253 multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col;
254 // memset the entire job queue buffer to zero
255 memset(job_queue, 0, total_jobs * sizeof(JobQueue));
257 // Job queue preparation
258 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
259 RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col];
260 JobQueue *job_queue_curr, *job_queue_temp;
263 tile_ctxt->job_queue_hdl.next = (void *)job_queue;
264 tile_ctxt->job_queue_hdl.num_jobs_acquired = 0;
266 job_queue_curr = job_queue;
267 job_queue_temp = job_queue;
269 // loop over all the vertical rows
270 for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col;
271 job_row_num++, jobs_per_tile++) {
272 job_queue_curr->job_info.vert_unit_row_num = job_row_num;
273 job_queue_curr->job_info.tile_col_id = tile_col;
274 job_queue_curr->job_info.tile_row_id = tile_row;
275 job_queue_curr->next = (void *)(job_queue_temp + 1);
276 job_queue_curr = ++job_queue_temp;
278 if (ENCODE_JOB == job_type) {
280 multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) {
287 // Set the last pointer to NULL
288 job_queue_curr += -1;
289 job_queue_curr->next = (void *)NULL;
291 // Move to the next tile
292 job_queue += jobs_per_tile_col;
295 for (i = 0; i < cpi->num_workers; i++) {
296 EncWorkerData *thread_data;
297 thread_data = &cpi->tile_thr_data[i];
298 thread_data->thread_id = i;
300 for (tile_col = 0; tile_col < tile_cols; tile_col++)
301 thread_data->tile_completion_status[tile_col] = 0;
305 int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
306 int *tile_completion_status, int *cur_tile_id,
309 int tile_id = -1; // Stores the tile ID with minimum proc done
310 int max_num_jobs_remaining = 0;
311 int num_jobs_remaining;
313 // Mark the completion to avoid check in the loop
314 tile_completion_status[*cur_tile_id] = 1;
315 // Check for the status of all the tiles
316 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
317 if (tile_completion_status[tile_col] == 0) {
319 vp9_get_job_queue_status(multi_thread_ctxt, tile_col);
320 // Mark the completion to avoid checks during future switches across tiles
321 if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1;
322 if (num_jobs_remaining > max_num_jobs_remaining) {
323 max_num_jobs_remaining = num_jobs_remaining;
332 // Update the cur ID to the next tile ID that will be processed,
333 // which will be the least processed tile
334 *cur_tile_id = tile_id;