From 753fd86e86ac727dccac88376260b8f54502f2a3 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 30 Aug 2018 09:58:02 -0700 Subject: [PATCH] Revert "Loopfilter MultiThread Optimization" This reverts commit dafe064289a917977439ab6f4f002b9946496084. Corrupted files may cause the decoder to hang as row progress in the loopfilter is used to progress each thread. BUG=webm:1558 Change-Id: I0674ce9af14d3fb7b2da8124e7b600616c8e734a --- vp9/common/vp9_onyxc_int.h | 2 - vp9/common/vp9_thread_common.c | 147 --------------------------------- vp9/common/vp9_thread_common.h | 18 ---- vp9/decoder/vp9_decodeframe.c | 43 ++-------- vp9/decoder/vp9_decoder.h | 2 - vpx_util/vpx_thread.h | 17 ---- 6 files changed, 9 insertions(+), 220 deletions(-) diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index c72b6e64f..690fca796 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -258,8 +258,6 @@ typedef struct VP9Common { PARTITION_CONTEXT *above_seg_context; ENTROPY_CONTEXT *above_context; int above_context_alloc_cols; - - int lf_row; } VP9_COMMON; static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) { diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c index dc9aa405a..e0f5e0d83 100644 --- a/vp9/common/vp9_thread_common.c +++ b/vp9/common/vp9_thread_common.c @@ -229,26 +229,6 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, workers, num_workers, lf_sync); } -void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level, - int num_workers) { - const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - - if (!frame_filter_level) return; - - if (!lf_sync->sync_range || sb_rows != lf_sync->rows || - num_workers > lf_sync->num_workers) { - vp9_loop_filter_dealloc(lf_sync); - vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); - } - - // Initialize cur_sb_col to -1 for all SB rows. - memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); - - memset(lf_sync->num_tiles_done, 0, - sizeof(*lf_sync->num_tiles_done) * sb_rows); - cm->lf_row = 0; -} - // Set up nsync by width. static INLINE int get_sync_range(int width) { // nsync numbers are picked by testing. For example, for 4k @@ -286,25 +266,6 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, pthread_cond_init(&lf_sync->cond_[i], NULL); } } - pthread_mutex_init(&lf_sync->lf_mutex, NULL); - - CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex, - vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows)); - if (lf_sync->recon_done_mutex) { - int i; - for (i = 0; i < rows; ++i) { - pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL); - } - } - - CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond, - vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows)); - if (lf_sync->recon_done_cond) { - int i; - for (i = 0; i < rows; ++i) { - pthread_cond_init(&lf_sync->recon_done_cond[i], NULL); - } - } } #endif // CONFIG_MULTITHREAD @@ -315,11 +276,6 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); - CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done, - vpx_malloc(sizeof(*lf_sync->num_tiles_done) * - mi_cols_aligned_to_sb(cm->mi_rows) >> - MI_BLOCK_SIZE_LOG2)); - // Set up nsync. lf_sync->sync_range = get_sync_range(width); } @@ -342,118 +298,15 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { } vpx_free(lf_sync->cond_); } - if (lf_sync->recon_done_mutex != NULL) { - int i; - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]); - } - vpx_free(lf_sync->recon_done_mutex); - } - - pthread_mutex_destroy(&lf_sync->lf_mutex); - if (lf_sync->recon_done_cond != NULL) { - int i; - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->recon_done_cond[i]); - } - vpx_free(lf_sync->recon_done_cond); - } #endif // CONFIG_MULTITHREAD - vpx_free(lf_sync->lfdata); vpx_free(lf_sync->cur_sb_col); - vpx_free(lf_sync->num_tiles_done); // clear the structure as the source of this call may be a resize in which // case this call will be followed by an _alloc() which may fail. vp9_zero(*lf_sync); } } -static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { - int return_val = -1; - int cur_row; - const int max_rows = cm->mi_rows; - -#if CONFIG_MULTITHREAD - const int tile_cols = 1 << cm->log2_tile_cols; - - pthread_mutex_lock(&lf_sync->lf_mutex); - if (cm->lf_row < max_rows) { - cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; - return_val = cm->lf_row; - cm->lf_row += MI_BLOCK_SIZE; - if (cm->lf_row < max_rows) { - /* If this is not the last row, make sure the next row is also decoded. - * This is because the intra predict has to happen before loop filter */ - cur_row += 1; - } - } - pthread_mutex_unlock(&lf_sync->lf_mutex); - - if (return_val == -1) return return_val; - - pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]); - if (lf_sync->num_tiles_done[cur_row] < tile_cols) { - pthread_cond_wait(&lf_sync->recon_done_cond[cur_row], - &lf_sync->recon_done_mutex[cur_row]); - } - pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]); -#else - (void)lf_sync; - if (cm->lf_row < max_rows) { - cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; - return_val = cm->lf_row; - cm->lf_row += MI_BLOCK_SIZE; - if (cm->lf_row < max_rows) { - /* If this is not the last row, make sure the next row is also decoded. - * This is because the intra predict has to happen before loop filter */ - cur_row += 1; - } - } -#endif // CONFIG_MULTITHREAD - - return return_val; -} - -void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync, - MACROBLOCKD *xd) { - int mi_row; - VP9_COMMON *cm = lf_data->cm; - - while (!xd->corrupted && (mi_row = get_next_row(cm, lf_sync)) != -1 && - mi_row < cm->mi_rows) { - lf_data->start = mi_row; - lf_data->stop = mi_row + MI_BLOCK_SIZE; - - thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only, - lf_sync); - } -} - -void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row) { -#if CONFIG_MULTITHREAD - pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); - lf_sync->num_tiles_done[row] += 1; - if (num_tiles == lf_sync->num_tiles_done[row]) { - if (is_last_row) { - /* The last 2 rows wait on the last row to be done. - * So, we have to broadcast the signal in this case. - */ - pthread_cond_broadcast(&lf_sync->recon_done_cond[row]); - } else { - pthread_cond_signal(&lf_sync->recon_done_cond[row]); - } - } - pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]); -#else - (void)lf_sync; - (void)num_tiles; - (void)row; - (void)is_last_row; -#endif // CONFIG_MULTITHREAD -} - // Accumulate frame counts. void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, const FRAME_COUNTS *counts, int is_dec) { diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h index 09609f821..0f7c3ff74 100644 --- a/vp9/common/vp9_thread_common.h +++ b/vp9/common/vp9_thread_common.h @@ -37,13 +37,6 @@ typedef struct VP9LfSyncData { // Row-based parallel loopfilter data LFWorkerData *lfdata; int num_workers; - -#if CONFIG_MULTITHREAD - pthread_mutex_t lf_mutex; - pthread_mutex_t *recon_done_mutex; - pthread_cond_t *recon_done_cond; -#endif - int *num_tiles_done; } VP9LfSync; // Allocate memory for loopfilter row synchronization. @@ -60,17 +53,6 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm, int partial_frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync); -// Multi-threaded loopfilter initialisations -void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm, - int frame_filter_level, int num_workers); - -void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync, - MACROBLOCKD *xd); - -void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row); - -void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row); - void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, const struct FRAME_COUNTS *counts, int is_dec); diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index b38f868fb..3c17c3dc3 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1487,11 +1487,6 @@ static int tile_worker_hook(void *arg1, void *arg2) { TileInfo *volatile tile = &tile_data->xd.tile; const int final_col = (1 << pbi->common.log2_tile_cols) - 1; const uint8_t *volatile bit_reader_end = NULL; - VP9_COMMON *cm = &pbi->common; - - LFWorkerData *lf_data = tile_data->lf_data; - VP9LfSync *lf_sync = tile_data->lf_sync; - volatile int n = tile_data->buf_start; tile_data->error_info.setjmp = 1; @@ -1524,13 +1519,6 @@ static int tile_worker_hook(void *arg1, void *arg2) { mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); } - if (cm->lf.filter_level && !cm->skip_loop_filter) { - const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); - const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2); - const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2); - vp9_set_row(lf_sync, 1 << cm->log2_tile_cols, - mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row); - } } if (buf->col == final_col) { @@ -1538,11 +1526,6 @@ static int tile_worker_hook(void *arg1, void *arg2) { } } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); - if (!tile_data->xd.corrupted && cm->lf.filter_level && - !cm->skip_loop_filter) { - vp9_loopfilter_rows(lf_data, lf_sync, &tile_data->xd); - } - tile_data->data_end = bit_reader_end; return !tile_data->xd.corrupted; } @@ -1559,8 +1542,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, VP9_COMMON *const cm = &pbi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const uint8_t *bit_reader_end = NULL; - VP9LfSync *lf_row_sync = &pbi->lf_row_sync; - YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; @@ -1587,26 +1568,12 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, } } - // Initialize LPF - if (cm->lf.filter_level && !cm->skip_loop_filter) { - vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level, - pbi->num_tile_workers); - } - // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; TileWorkerData *const tile_data = &pbi->tile_worker_data[n + pbi->total_tiles]; winterface->sync(worker); - - if (cm->lf.filter_level && !cm->skip_loop_filter) { - tile_data->lf_sync = lf_row_sync; - tile_data->lf_data = &tile_data->lf_sync->lfdata[n]; - vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane); - tile_data->lf_data->y_only = 0; - } - tile_data->xd = pbi->mb; tile_data->xd.counts = cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; @@ -2128,7 +2095,15 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { // Multi-threaded tile decoder *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - if (xd->corrupted) { + if (!xd->corrupted) { + if (!cm->skip_loop_filter) { + // If multiple threads are used to decode tiles, then we use those + // threads to do parallel loopfiltering. + vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level, + 0, 0, pbi->tile_workers, pbi->num_tile_workers, + &pbi->lf_row_sync); + } + } else { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Decode failed. Frame data is corrupted."); } diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 532f5f848..5f22c00cb 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -37,8 +37,6 @@ typedef struct TileWorkerData { int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive vpx_reader bit_reader; FRAME_COUNTS counts; - LFWorkerData *lf_data; - VP9LfSync *lf_sync; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); diff --git a/vpx_util/vpx_thread.h b/vpx_util/vpx_thread.h index 013a6179e..53a5f4966 100644 --- a/vpx_util/vpx_thread.h +++ b/vpx_util/vpx_thread.h @@ -159,23 +159,6 @@ static INLINE int pthread_cond_init(pthread_cond_t *const condition, return 0; } -static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { - int ok = 1; -#ifdef USE_WINDOWS_CONDITION_VARIABLE - WakeAllConditionVariable(condition); -#else - while (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { - // a thread is waiting in pthread_cond_wait: allow it to be notified - ok &= SetEvent(condition->signal_event_); - // wait until the event is consumed so the signaler cannot consume - // the event via its own pthread_cond_wait. - ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != - WAIT_OBJECT_0); - } -#endif - return !ok; -} - static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { int ok = 1; #ifdef USE_WINDOWS_CONDITION_VARIABLE -- 2.40.0