workers, num_workers, lf_sync);
}
-void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
- int num_workers) {
- const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
-
- if (!frame_filter_level) return;
-
- if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
- num_workers > lf_sync->num_workers) {
- vp9_loop_filter_dealloc(lf_sync);
- vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
- }
-
- // Initialize cur_sb_col to -1 for all SB rows.
- memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
-
- memset(lf_sync->num_tiles_done, 0,
- sizeof(*lf_sync->num_tiles_done) * sb_rows);
- cm->lf_row = 0;
-}
-
// Set up nsync by width.
static INLINE int get_sync_range(int width) {
// nsync numbers are picked by testing. For example, for 4k
pthread_cond_init(&lf_sync->cond_[i], NULL);
}
}
- pthread_mutex_init(&lf_sync->lf_mutex, NULL);
-
- CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
- vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
- if (lf_sync->recon_done_mutex) {
- int i;
- for (i = 0; i < rows; ++i) {
- pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
- }
- }
-
- CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
- vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
- if (lf_sync->recon_done_cond) {
- int i;
- for (i = 0; i < rows; ++i) {
- pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
- }
- }
}
#endif // CONFIG_MULTITHREAD
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
- CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
- vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
- mi_cols_aligned_to_sb(cm->mi_rows) >>
- MI_BLOCK_SIZE_LOG2));
-
// Set up nsync.
lf_sync->sync_range = get_sync_range(width);
}
}
vpx_free(lf_sync->cond_);
}
- if (lf_sync->recon_done_mutex != NULL) {
- int i;
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
- }
- vpx_free(lf_sync->recon_done_mutex);
- }
-
- pthread_mutex_destroy(&lf_sync->lf_mutex);
- if (lf_sync->recon_done_cond != NULL) {
- int i;
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
- }
- vpx_free(lf_sync->recon_done_cond);
- }
#endif // CONFIG_MULTITHREAD
-
vpx_free(lf_sync->lfdata);
vpx_free(lf_sync->cur_sb_col);
- vpx_free(lf_sync->num_tiles_done);
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
vp9_zero(*lf_sync);
}
}
-static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
- int return_val = -1;
- int cur_row;
- const int max_rows = cm->mi_rows;
-
-#if CONFIG_MULTITHREAD
- const int tile_cols = 1 << cm->log2_tile_cols;
-
- pthread_mutex_lock(&lf_sync->lf_mutex);
- if (cm->lf_row < max_rows) {
- cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
- return_val = cm->lf_row;
- cm->lf_row += MI_BLOCK_SIZE;
- if (cm->lf_row < max_rows) {
- /* If this is not the last row, make sure the next row is also decoded.
- * This is because the intra predict has to happen before loop filter */
- cur_row += 1;
- }
- }
- pthread_mutex_unlock(&lf_sync->lf_mutex);
-
- if (return_val == -1) return return_val;
-
- pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
- if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
- pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
- &lf_sync->recon_done_mutex[cur_row]);
- }
- pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
-#else
- (void)lf_sync;
- if (cm->lf_row < max_rows) {
- cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
- return_val = cm->lf_row;
- cm->lf_row += MI_BLOCK_SIZE;
- if (cm->lf_row < max_rows) {
- /* If this is not the last row, make sure the next row is also decoded.
- * This is because the intra predict has to happen before loop filter */
- cur_row += 1;
- }
- }
-#endif // CONFIG_MULTITHREAD
-
- return return_val;
-}
-
-void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync,
- MACROBLOCKD *xd) {
- int mi_row;
- VP9_COMMON *cm = lf_data->cm;
-
- while (!xd->corrupted && (mi_row = get_next_row(cm, lf_sync)) != -1 &&
- mi_row < cm->mi_rows) {
- lf_data->start = mi_row;
- lf_data->stop = mi_row + MI_BLOCK_SIZE;
-
- thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
- lf_data->start, lf_data->stop, lf_data->y_only,
- lf_sync);
- }
-}
-
-void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row) {
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
- lf_sync->num_tiles_done[row] += 1;
- if (num_tiles == lf_sync->num_tiles_done[row]) {
- if (is_last_row) {
- /* The last 2 rows wait on the last row to be done.
- * So, we have to broadcast the signal in this case.
- */
- pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
- } else {
- pthread_cond_signal(&lf_sync->recon_done_cond[row]);
- }
- }
- pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
-#else
- (void)lf_sync;
- (void)num_tiles;
- (void)row;
- (void)is_last_row;
-#endif // CONFIG_MULTITHREAD
-}
-
// Accumulate frame counts.
void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
const FRAME_COUNTS *counts, int is_dec) {
TileInfo *volatile tile = &tile_data->xd.tile;
const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
const uint8_t *volatile bit_reader_end = NULL;
- VP9_COMMON *cm = &pbi->common;
-
- LFWorkerData *lf_data = tile_data->lf_data;
- VP9LfSync *lf_sync = tile_data->lf_sync;
-
volatile int n = tile_data->buf_start;
tile_data->error_info.setjmp = 1;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
}
- if (cm->lf.filter_level && !cm->skip_loop_filter) {
- const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
- const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
- const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
- vp9_set_row(lf_sync, 1 << cm->log2_tile_cols,
- mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row);
- }
}
if (buf->col == final_col) {
}
} while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
- if (!tile_data->xd.corrupted && cm->lf.filter_level &&
- !cm->skip_loop_filter) {
- vp9_loopfilter_rows(lf_data, lf_sync, &tile_data->xd);
- }
-
tile_data->data_end = bit_reader_end;
return !tile_data->xd.corrupted;
}
VP9_COMMON *const cm = &pbi->common;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const uint8_t *bit_reader_end = NULL;
- VP9LfSync *lf_row_sync = &pbi->lf_row_sync;
- YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
}
}
- // Initialize LPF
- if (cm->lf.filter_level && !cm->skip_loop_filter) {
- vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level,
- pbi->num_tile_workers);
- }
-
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
TileWorkerData *const tile_data =
&pbi->tile_worker_data[n + pbi->total_tiles];
winterface->sync(worker);
-
- if (cm->lf.filter_level && !cm->skip_loop_filter) {
- tile_data->lf_sync = lf_row_sync;
- tile_data->lf_data = &tile_data->lf_sync->lfdata[n];
- vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane);
- tile_data->lf_data->y_only = 0;
- }
-
tile_data->xd = pbi->mb;
tile_data->xd.counts =
cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
- if (xd->corrupted) {
+ if (!xd->corrupted) {
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
+ 0, 0, pbi->tile_workers, pbi->num_tile_workers,
+ &pbi->lf_row_sync);
+ }
+ } else {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data is corrupted.");
}