From: Jingning Han Date: Fri, 17 Oct 2014 15:58:28 +0000 (-0700) Subject: Hybrid partition search for rtc coding mode X-Git-Tag: v1.4.0~584^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9f128b3ed9fc2f431444f7cea238a288fb0e470c;p=libvpx Hybrid partition search for rtc coding mode This commit re-designs the recursive partition search scheme in rtc speed -5. It first checks if the current block is under cyclic refresh mode. If so, apply recursive partition search. Otherwise, perform sub-sampled pixel based partition selection. When the pre-selection finds the partition size should be 32x32 or above, use the partition size directly. Otherwise, apply partition search at nearby levels around the preset partition size. It is enabled in speed -5. The compression performance of rtc speed -5 is improved by 9.4%. Speed wise, the run-time goes slower from 1% to 10%. nik_720p, 1000 kbps 33220 b/f, 38.977 dB, 10109 ms -> 33200 b/f, 39.119 dB, 10210 ms vidyo1_720p, 1000 kbps 16536 b/f, 40.495 dB, 10119 ms -> 16536 b/f, 40.827 dB, 11287 ms Change-Id: I65adba352e3adc03bae50854ddaea1b421653c6c --- diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 6b28ee591..47d9580a8 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -34,6 +34,7 @@ typedef struct { int is_coded; int num_4x4_blk; int skip; + int pred_pixel_ready; // For current partition, only if all Y, U, and V transform blocks' // coefficients are quantized to 0, skippable is set to 0. int skippable; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 6eff8c501..9436b0adc 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -813,6 +813,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, } ctx->is_coded = 0; ctx->skippable = 0; + ctx->pred_pixel_ready = 0; x->skip_recode = 0; // Set to zero to make sure we do not use the previous encoded frame stats @@ -2837,6 +2838,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ctx->mic.mbmi = xd->mi[0].src_mi->mbmi; ctx->skip_txfm[0] = x->skip_txfm[0]; ctx->skip = x->skip; + ctx->pred_pixel_ready = 0; if (this_rate != INT_MAX) { int pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -2922,6 +2924,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; + pc_tree->horizontal[0].pred_pixel_ready = 0; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); @@ -2934,6 +2937,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; + pc_tree->horizontal[1].pred_pixel_ready = 0; if (this_rate == INT_MAX) { sum_rd = INT64_MAX; @@ -2966,6 +2970,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; + pc_tree->vertical[0].pred_pixel_ready = 0; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { load_pred_mv(x, ctx); @@ -2975,6 +2980,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; + pc_tree->vertical[1].pred_pixel_ready = 0; if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { @@ -3035,6 +3041,138 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } +static void nonrd_select_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO *mi, + TOKENEXTRA **tp, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int output_enabled, + int *totrate, int64_t *totdist, + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + const int mis = cm->mi_stride; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + int rate = INT_MAX; + int64_t dist = INT64_MAX; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + subsize = (bsize >= BLOCK_8X8) ? mi[0].src_mi->mbmi.sb_type : BLOCK_4X4; + partition = partition_lookup[bsl][subsize]; + + if (bsize == BLOCK_32X32 && partition != PARTITION_NONE && + subsize >= BLOCK_16X16) { + cpi->sf.max_partition_size = BLOCK_32X32; + cpi->sf.min_partition_size = BLOCK_8X8; + nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, bsize, + totrate, totdist, 0, INT64_MAX, pc_tree); + } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) { + cpi->sf.max_partition_size = BLOCK_16X16; + cpi->sf.min_partition_size = BLOCK_8X8; + nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, bsize, + totrate, totdist, 0, INT64_MAX, pc_tree); + } else { + switch (partition) { + case PARTITION_NONE: + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, + subsize, &pc_tree->none); + pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; + pc_tree->none.skip = x->skip; + pc_tree->none.pred_pixel_ready = 1; + break; + case PARTITION_VERT: + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, + subsize, &pc_tree->vertical[0]); + pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; + pc_tree->vertical[0].skip = x->skip; + pc_tree->vertical[0].pred_pixel_ready = 1; + if (mi_col + hbs < cm->mi_cols) { + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs, + &rate, &dist, subsize, &pc_tree->vertical[1]); + pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; + pc_tree->vertical[1].skip = x->skip; + pc_tree->vertical[1].pred_pixel_ready = 1; + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + } + break; + case PARTITION_HORZ: + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, + subsize, &pc_tree->horizontal[0]); + pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; + pc_tree->horizontal[0].skip = x->skip; + pc_tree->horizontal[0].pred_pixel_ready = 1; + if (mi_row + hbs < cm->mi_rows) { + nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, + &rate, &dist, subsize, &pc_tree->horizontal[0]); + pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; + pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; + pc_tree->horizontal[1].skip = x->skip; + pc_tree->horizontal[1].pred_pixel_ready = 1; + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + } + break; + case PARTITION_SPLIT: + subsize = get_subsize(bsize, PARTITION_SPLIT); + nonrd_select_partition(cpi, tile, mi, tp, mi_row, mi_col, + subsize, output_enabled, totrate, totdist, + pc_tree->split[0]); + nonrd_select_partition(cpi, tile, mi + hbs, tp, + mi_row, mi_col + hbs, subsize, output_enabled, + &rate, &dist, pc_tree->split[1]); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + nonrd_select_partition(cpi, tile, mi + hbs * mis, tp, + mi_row + hbs, mi_col, subsize, output_enabled, + &rate, &dist, pc_tree->split[2]); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + nonrd_select_partition(cpi, tile, mi + hbs * mis + hbs, tp, + mi_row + hbs, mi_col + hbs, subsize, + output_enabled, &rate, &dist, pc_tree->split[3]); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + break; + default: + assert("Invalid partition type."); + break; + } + } + + if (bsize == BLOCK_64X64 && output_enabled) { + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + *totrate, *totdist); + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize, pc_tree); + } +} + + static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO *mi, @@ -3193,21 +3331,23 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, 1, &dummy_rate, &dummy_dist, cpi->pc_root); break; case REFERENCE_PARTITION: - if (sf->partition_check || - !(x->in_static_area = is_background(cpi, tile, mi_row, mi_col))) { - set_modeinfo_offsets(cm, xd, mi_row, mi_col); + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + x->in_static_area = is_background(cpi, tile, mi_row, mi_col); + + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && + xd->mi[0].src_mi->mbmi.segment_id && x->in_static_area) { auto_partition_range(cpi, tile, mi_row, mi_col, &sf->min_partition_size, &sf->max_partition_size); nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, INT64_MAX, - cpi->pc_root); + &dummy_rate, &dummy_dist, 1, + INT64_MAX, cpi->pc_root); } else { choose_partitioning(cpi, tile, mi_row, mi_col); - nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, - BLOCK_64X64, 1, &dummy_rate, &dummy_dist, - cpi->pc_root); + nonrd_select_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + 1, &dummy_rate, &dummy_dist, cpi->pc_root); } + break; default: assert(0); @@ -3737,7 +3877,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } - if (!cpi->sf.reuse_inter_pred_sby || seg_skip) + if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 9e3ee2c94..b138a87ff 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -276,6 +276,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; + // This feature is only enabled when partition search is disabled. + sf->reuse_inter_pred_sby = 1; + if (MIN(cm->width, cm->height) >= 720) sf->partition_search_breakout_dist_thr = (1 << 25); else @@ -298,9 +301,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; - // This feature is only enabled when partition search is disabled. - sf->reuse_inter_pred_sby = 1; - // Increase mode checking threshold for NEWMV. sf->elevate_newmv_thresh = 1000;