From 7bea8c59f934aa8bfab43935b2355b88adaa12f0 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Wed, 29 Oct 2014 16:37:16 -0700 Subject: [PATCH] Rework pred pixel buffer system in non-RD coding mode This commit makes the inter prediction buffer system to support hybrid partition search. It reduces the runtime of speed -5 by about 3%. No compression performance change. vidyo1 720p 1000 kbps 11831 ms -> 11497 ms nik 720p 1000 kbps 10919 ms -> 10645 ms Change-Id: I5b2da747c6395c253cd074d3907f5402e1840c36 --- vp9/encoder/vp9_encodeframe.c | 51 ++++++++++++++++++++++++++--------- vp9/encoder/vp9_pickmode.c | 20 +++++++------- 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 18a091220..bdd501f8c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2678,6 +2678,22 @@ static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, } } +// Reset the prediction pixel ready flag recursively. +static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { + pc_tree->none.pred_pixel_ready = 0; + pc_tree->horizontal[0].pred_pixel_ready = 0; + pc_tree->horizontal[1].pred_pixel_ready = 0; + pc_tree->vertical[0].pred_pixel_ready = 0; + pc_tree->vertical[1].pred_pixel_ready = 0; + + if (bsize > BLOCK_8X8) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); + int i; + for (i = 0; i < 4; ++i) + pred_pixel_ready_reset(pc_tree->split[i], subsize); + } +} + static void nonrd_pick_partition(VP9_COMP *cpi, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, @@ -2736,6 +2752,10 @@ static void nonrd_pick_partition(VP9_COMP *cpi, partition_vert_allowed &= force_vert_split; } + ctx->pred_pixel_ready = !(partition_vert_allowed || + partition_horz_allowed || + do_split); + // PARTITION_NONE if (partition_none_allowed) { nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, @@ -2743,7 +2763,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ctx->mic.mbmi = xd->mi[0].src_mi->mbmi; ctx->skip_txfm[0] = x->skip_txfm[0]; ctx->skip = x->skip; - ctx->pred_pixel_ready = 0; if (this_rdc.rate != INT_MAX) { int pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -2819,17 +2838,17 @@ static void nonrd_pick_partition(VP9_COMP *cpi, subsize = get_subsize(bsize, PARTITION_HORZ); if (sf->adaptive_motion_search) load_pred_mv(x, ctx); - + pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; - pc_tree->horizontal[0].pred_pixel_ready = 0; if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { load_pred_mv(x, ctx); + pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row + ms, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1]); @@ -2837,7 +2856,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; - pc_tree->horizontal[1].pred_pixel_ready = 0; if (this_rdc.rate == INT_MAX) { vp9_rd_cost_reset(&sum_rdc); @@ -2854,32 +2872,32 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_HORZ; + } else { + pred_pixel_ready_reset(pc_tree, bsize); } } // PARTITION_VERT if (partition_vert_allowed && do_rect) { subsize = get_subsize(bsize, PARTITION_VERT); - if (sf->adaptive_motion_search) load_pred_mv(x, ctx); - + pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; - pc_tree->vertical[0].pred_pixel_ready = 0; if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { load_pred_mv(x, ctx); + pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + ms, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; - pc_tree->vertical[1].pred_pixel_ready = 0; if (this_rdc.rate == INT_MAX) { vp9_rd_cost_reset(&sum_rdc); @@ -2896,6 +2914,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_VERT; + } else { + pred_pixel_ready_reset(pc_tree, bsize); } } @@ -2977,27 +2997,27 @@ static void nonrd_select_partition(VP9_COMP *cpi, } else { switch (partition) { case PARTITION_NONE: + pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; - pc_tree->none.pred_pixel_ready = 1; break; case PARTITION_VERT: + pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; - pc_tree->vertical[0].pred_pixel_ready = 1; if (mi_col + hbs < cm->mi_cols) { + pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; - pc_tree->vertical[1].pred_pixel_ready = 1; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; @@ -3006,19 +3026,19 @@ static void nonrd_select_partition(VP9_COMP *cpi, } break; case PARTITION_HORZ: + pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; - pc_tree->horizontal[0].pred_pixel_ready = 1; if (mi_row + hbs < cm->mi_rows) { + pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; - pc_tree->horizontal[1].pred_pixel_ready = 1; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; @@ -3096,6 +3116,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, switch (partition) { case PARTITION_NONE: + pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; @@ -3103,12 +3124,14 @@ static void nonrd_use_partition(VP9_COMP *cpi, pc_tree->none.skip = x->skip; break; case PARTITION_VERT: + pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; if (mi_col + hbs < cm->mi_cols) { + pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; @@ -3122,12 +3145,14 @@ static void nonrd_use_partition(VP9_COMP *cpi, } break; case PARTITION_HORZ: + pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; if (mi_row + hbs < cm->mi_rows) { + pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 6928338ee..13564c347 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -515,8 +515,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, PRED_BUFFER *best_pred = NULL; PRED_BUFFER *this_mode_pred = NULL; const int pixels_in_block = bh * bw; + int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready; - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { int i; for (i = 0; i < 3; i++) { #if CONFIG_VP9_HIGHBITDEPTH @@ -639,7 +640,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for the best prediction filter type, when the resulting // motion vector is at sub-pixel accuracy level for luma component, i.e., // the last three bits are all zeros. - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { if (!this_mode_pred) { this_mode_pred = &tmp[3]; } else { @@ -677,7 +678,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_cost = cost; skip_txfm = x->skip_txfm[0]; - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { if (this_mode_pred != current_pred) { free_pred_buffer(this_mode_pred); this_mode_pred = current_pred; @@ -692,7 +693,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred) + if (reuse_inter_pred && this_mode_pred != current_pred) free_pred_buffer(current_pred); mbmi->interp_filter = best_filter; @@ -744,13 +745,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_ref_frame = ref_frame; skip_txfm = x->skip_txfm[0]; - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { free_pred_buffer(best_pred); - best_pred = this_mode_pred; } } else { - if (cpi->sf.reuse_inter_pred_sby) + if (reuse_inter_pred) free_pred_buffer(this_mode_pred); } @@ -764,7 +764,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // If best prediction is not in dst buf, then copy the prediction block from // temp buf to dst buf. - if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby && + if (best_pred != NULL && reuse_inter_pred && best_pred->data != orig_dst.buf) { pd->dst = orig_dst; #if CONFIG_VP9_HIGHBITDEPTH @@ -799,7 +799,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - if (cpi->sf.reuse_inter_pred_sby) { + if (reuse_inter_pred) { pd->dst.buf = tmp[0].data; pd->dst.stride = bw; } @@ -831,7 +831,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip_txfm[0] = skip_txfm; } } - if (cpi->sf.reuse_inter_pred_sby) + if (reuse_inter_pred) pd->dst = orig_dst; } -- 2.40.0