From 351ec07a6da930b5751cc84586f15ed65f1f9033 Mon Sep 17 00:00:00 2001 From: Ritu Baldwa Date: Wed, 10 Oct 2018 16:25:51 +0530 Subject: [PATCH] Add Memory to Enable Row Decode Row based multi-thread needs extra memory to store the parsed co-efficients, partitions and eob. This commit adds memory for the same. Change-Id: I13fa4a6ada2ec3048bc973e465055b832429388f --- vp9/decoder/vp9_decodeframe.c | 22 +++++++++++++++++++ vp9/decoder/vp9_decoder.c | 41 +++++++++++++++++++++++++++++++++++ vp9/decoder/vp9_decoder.h | 17 +++++++++++++++ 3 files changed, 80 insertions(+) diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 95e376d04..bc0fc6197 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1984,6 +1984,28 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_segmentation_dequant(cm); setup_tile_info(cm, rb); + if (pbi->row_mt == 1) { + int num_sbs = 1; + + if (pbi->row_mt_worker_data == NULL) { + CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data, + vpx_calloc(1, sizeof(*pbi->row_mt_worker_data))); + } + + if (pbi->max_threads > 1) { + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + + num_sbs = sb_cols * sb_rows; + } + + if (num_sbs > pbi->row_mt_worker_data->num_sbs) { + vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); + vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs); + } + } sz = vpx_rb_read_literal(rb, 16); if (sz == 0) diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 5e41274cc..1e2a44293 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -55,6 +55,43 @@ static void vp9_dec_setup_mi(VP9_COMMON *cm) { cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); } +void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, + VP9_COMMON *cm, int num_sbs) { + int plane; + const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) * + sizeof(*row_mt_worker_data->dqcoeff[0]); + row_mt_worker_data->num_sbs = num_sbs; + for (plane = 0; plane < 3; ++plane) { + CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane], + vpx_memalign(16, dqcoeff_size)); + memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size); + CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane], + vpx_calloc(num_sbs << EOBS_PER_SB_LOG2, + sizeof(*row_mt_worker_data->eob[plane]))); + } + CHECK_MEM_ERROR(cm, row_mt_worker_data->partition, + vpx_calloc(num_sbs * PARTITIONS_PER_SB, + sizeof(*row_mt_worker_data->partition))); + CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map, + vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map))); +} + +void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) { + if (row_mt_worker_data != NULL) { + int plane; + for (plane = 0; plane < 3; ++plane) { + vpx_free(row_mt_worker_data->eob[plane]); + row_mt_worker_data->eob[plane] = NULL; + vpx_free(row_mt_worker_data->dqcoeff[plane]); + row_mt_worker_data->dqcoeff[plane] = NULL; + } + vpx_free(row_mt_worker_data->partition); + row_mt_worker_data->partition = NULL; + vpx_free(row_mt_worker_data->recon_map); + row_mt_worker_data->recon_map = NULL; + } +} + static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); if (!cm->mip) return 1; @@ -140,6 +177,10 @@ void vp9_decoder_remove(VP9Decoder *pbi) { vp9_loop_filter_dealloc(&pbi->lf_row_sync); } + if (pbi->row_mt == 1) { + vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); + vpx_free(pbi->row_mt_worker_data); + } vp9_remove_common(&pbi->common); vpx_free(pbi); } diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 425c8964c..5354105f8 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -26,6 +26,10 @@ extern "C" { #endif +#define EOBS_PER_SB_LOG2 8 +#define DQCOEFFS_PER_SB_LOG2 12 +#define PARTITIONS_PER_SB 85 + typedef struct TileBuffer { const uint8_t *data; size_t size; @@ -45,6 +49,14 @@ typedef struct TileWorkerData { struct vpx_internal_error_info error_info; } TileWorkerData; +typedef struct RowMTWorkerData { + int num_sbs; + int *eob[MAX_MB_PLANE]; + PARTITION_TYPE *partition; + tran_low_t *dqcoeff[MAX_MB_PLANE]; + int8_t *recon_map; +} RowMTWorkerData; + typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -77,6 +89,7 @@ typedef struct VP9Decoder { int row_mt; int lpf_mt_opt; + RowMTWorkerData *row_mt_worker_data; } VP9Decoder; int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, @@ -114,6 +127,10 @@ struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); void vp9_decoder_remove(struct VP9Decoder *pbi); +void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, + VP9_COMMON *cm, int num_sbs); +void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data); + static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, BufferPool *const pool) { if (idx >= 0 && frame_bufs[idx].ref_count > 0) { -- 2.40.0