From 12ec948490c61bf234febc84196216442529180b Mon Sep 17 00:00:00 2001 From: Ranjit Kumar Tulabandu Date: Thu, 2 Feb 2017 14:19:29 +0530 Subject: [PATCH] Changes to facilitate multi-threading of encoding stage Modified the encoding stage to have row level entry points with relevant initializations and to access the token information at row level Change-Id: Ife10e55a7c1a420ee906d711caf75002688d9e39 --- vp9/encoder/vp9_bitstream.c | 34 ++++++++++++------------ vp9/encoder/vp9_bitstream.h | 2 -- vp9/encoder/vp9_encodeframe.c | 49 +++++++++++++++++++++++++++-------- vp9/encoder/vp9_encoder.c | 10 +++++++ vp9/encoder/vp9_encoder.h | 21 +++++++++++++++ 5 files changed, 86 insertions(+), 30 deletions(-) diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 49aea69eb..43c5eaed0 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -484,23 +484,31 @@ static void write_modes_sb(VP9_COMP *cpi, MACROBLOCKD *const xd, } static void write_modes(VP9_COMP *cpi, MACROBLOCKD *const xd, - const TileInfo *const tile, vpx_writer *w, - TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, - unsigned int *const max_mv_magnitude, + const TileInfo *const tile, vpx_writer *w, int tile_row, + int tile_col, unsigned int *const max_mv_magnitude, int interp_filter_selected[MAX_REF_FRAMES] [SWITCHABLE]) { const VP9_COMMON *const cm = &cpi->common; - int mi_row, mi_col; + int mi_row, mi_col, tile_sb_row; + TOKENEXTRA *tok = NULL; + TOKENEXTRA *tok_end = NULL; set_partition_probs(cm, xd); for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { + tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile->mi_row_start) >> + MI_BLOCK_SIZE_LOG2; + tok = cpi->tplist[tile_row][tile_col][tile_sb_row].start; + tok_end = tok + cpi->tplist[tile_row][tile_col][tile_sb_row].count; + vp9_zero(xd->left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, + write_modes_sb(cpi, xd, tile, w, &tok, tok_end, mi_row, mi_col, BLOCK_64X64, max_mv_magnitude, interp_filter_selected); + + assert(tok == cpi->tplist[tile_row][tile_col][tile_sb_row].stop); } } @@ -919,9 +927,8 @@ static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) { MACROBLOCKD *const xd = &data->xd; vpx_start_encode(&data->bit_writer, data->dest); write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info, - &data->bit_writer, &data->tok, data->tok_end, - &data->max_mv_magnitude, data->interp_filter_selected); - assert(data->tok == data->tok_end); + &data->bit_writer, 0, data->tile_idx, &data->max_mv_magnitude, + data->interp_filter_selected); vpx_stop_encode(&data->bit_writer); return 1; } @@ -978,8 +985,6 @@ static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) { // Populate the worker data. data->xd = cpi->td.mb.e_mbd; data->tile_idx = tile_col; - data->tok = cpi->tile_tok[0][tile_col]; - data->tok_end = cpi->tile_tok[0][tile_col] + cpi->tok_count[0][tile_col]; data->max_mv_magnitude = cpi->max_mv_magnitude; memset(data->interp_filter_selected, 0, sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE); @@ -1039,7 +1044,6 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; vpx_writer residual_bc; int tile_row, tile_col; - TOKENEXTRA *tok_end; size_t total_size = 0; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; @@ -1058,10 +1062,6 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { for (tile_row = 0; tile_row < tile_rows; tile_row++) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { int tile_idx = tile_row * tile_cols + tile_col; - TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; - - tok_end = cpi->tile_tok[tile_row][tile_col] + - cpi->tok_count[tile_row][tile_col]; if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) vpx_start_encode(&residual_bc, data_ptr + total_size + 4); @@ -1069,9 +1069,9 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { vpx_start_encode(&residual_bc, data_ptr + total_size); write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc, - &tok, tok_end, &cpi->max_mv_magnitude, + tile_row, tile_col, &cpi->max_mv_magnitude, cpi->interp_filter_selected); - assert(tok == tok_end); + vpx_stop_encode(&residual_bc); if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) { // size of this tile diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h index 044a3bbc7..339c3fecb 100644 --- a/vp9/encoder/vp9_bitstream.h +++ b/vp9/encoder/vp9_bitstream.h @@ -20,8 +20,6 @@ extern "C" { typedef struct VP9BitstreamWorkerData { uint8_t *dest; int dest_size; - TOKENEXTRA *tok; - TOKENEXTRA *tok_end; vpx_writer bit_writer; int tile_idx; unsigned int max_mv_magnitude; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index bf3238967..c16901992 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -4078,7 +4078,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) { const int tile_rows = 1 << cm->log2_tile_rows; int tile_col, tile_row; TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; + TOKENLIST *tplist = cpi->tplist[0][0]; int tile_tok = 0; + int tplist_count = 0; if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { if (cpi->tile_data != NULL) vpx_free(cpi->tile_data); @@ -4109,17 +4111,50 @@ void vp9_init_tile_data(VP9_COMP *cpi) { cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; pre_tok = cpi->tile_tok[tile_row][tile_col]; tile_tok = allocated_tokens(*tile_info); + + cpi->tplist[tile_row][tile_col] = tplist + tplist_count; + tplist = cpi->tplist[tile_row][tile_col]; + tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); } } } +void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row, + int tile_col, int mi_row) { + VP9_COMMON *const cm = &cpi->common; + const int tile_cols = 1 << cm->log2_tile_cols; + TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; + const TileInfo *const tile_info = &this_tile->tile_info; + TOKENEXTRA *tok = NULL; + int tile_sb_row; + int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; + + tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >> + MI_BLOCK_SIZE_LOG2; + get_start_tok(cpi, tile_row, tile_col, mi_row, &tok); + cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok; + + if (cpi->sf.use_nonrd_pick_mode) + encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); + else + encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); + + cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok; + cpi->tplist[tile_row][tile_col][tile_sb_row].count = + (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop - + cpi->tplist[tile_row][tile_col][tile_sb_row].start); + assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <= + get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols)); + + (void)tile_mb_cols; +} + void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, int tile_col) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; const TileInfo *const tile_info = &this_tile->tile_info; - TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; const int mi_row_start = tile_info->mi_row_start; const int mi_row_end = tile_info->mi_row_end; int mi_row; @@ -4130,16 +4165,8 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, td->mb.m_search_count_ptr = &this_tile->m_search_count; td->mb.ex_search_count_ptr = &this_tile->ex_search_count; - for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_nonrd_pick_mode) - encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); - else - encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); - } - cpi->tok_count[tile_row][tile_col] = - (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]); - assert(tok - cpi->tile_tok[tile_row][tile_col] <= - allocated_tokens(*tile_info)); + for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) + vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); } static void encode_tiles(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 806f07a60..0dce44c66 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -495,6 +495,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->tile_tok[0][0]); cpi->tile_tok[0][0] = 0; + vpx_free(cpi->tplist[0][0]); + cpi->tplist[0][0] = NULL; + vp9_free_pc_tree(&cpi->td); for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { @@ -829,6 +832,7 @@ static int alloc_context_buffers_ext(VP9_COMP *cpi) { static void alloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; + int sb_rows; vp9_alloc_context_buffers(cm, cm->width, cm->height); @@ -842,6 +846,12 @@ static void alloc_compressor_data(VP9_COMP *cpi) { vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0]))); } + sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + vpx_free(cpi->tplist[0][0]); + CHECK_MEM_ERROR( + cm, cpi->tplist[0][0], + vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0]))); + vp9_setup_pc_tree(&cpi->common, &cpi->td); } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index e1046f14a..91825395f 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -284,6 +284,12 @@ typedef struct RowMTInfo { #endif } RowMTInfo; +typedef struct { + TOKENEXTRA *start; + TOKENEXTRA *stop; + unsigned int count; +} TOKENLIST; + typedef struct MultiThreadHandle { int allocated_tile_rows; int allocated_tile_cols; @@ -470,6 +476,7 @@ typedef struct VP9_COMP { TOKENEXTRA *tile_tok[4][1 << 6]; uint32_t tok_count[4][1 << 6]; + TOKENLIST *tplist[4][1 << 6]; // Ambient reconstruction err target for force key frames int64_t ambient_err; @@ -777,6 +784,20 @@ static INLINE int allocated_tokens(TileInfo tile) { return get_token_alloc(tile_mb_rows, tile_mb_cols); } +static INLINE void get_start_tok(VP9_COMP *cpi, int tile_row, int tile_col, + int mi_row, TOKENEXTRA **tok) { + VP9_COMMON *const cm = &cpi->common; + const int tile_cols = 1 << cm->log2_tile_cols; + TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; + const TileInfo *const tile_info = &this_tile->tile_info; + + int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; + const int mb_row = (mi_row - tile_info->mi_row_start) >> 1; + + *tok = + cpi->tile_tok[tile_row][tile_col] + get_token_alloc(mb_row, tile_mb_cols); +} + int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); #if CONFIG_VP9_HIGHBITDEPTH int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, -- 2.40.0