From: Hui Su Date: Tue, 4 Sep 2018 21:44:02 +0000 (-0700) Subject: Enable rectangular partition search for speed 1 X-Git-Tag: v1.8.0~338^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9bd2bde10dfbe65224a73fd422a49a6c8e6ba078;p=libvpx Enable rectangular partition search for speed 1 This patch enables rectangular partition search on speed 1. The encoding speed loss is reduced thanks to recently added speed features. This only affects speed 1 low bit-depth encoding. Coding gains: avg_psnr ovr_psnr ssim lowres 0.577% 0.621% 0.665% midres 1.147% 1.215% 1.148% hdres 0.758% 0.790% 0.769% Tested encoding speed on 15 midres and 15 hdres clips, average speed loss: QP=30 QP=40 QP=50 midres 4.43% 3.72% -1.05% hdres 4.41% 5.65% 3.77% Change-Id: Ifc0712becccc69f7498796359ff12dbfa63fd7b3 --- diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 374ba1a0d..488f04d98 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3546,7 +3546,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (cpi->sf.use_square_partition_only && - bsize > cpi->sf.use_square_only_threshold) { + (bsize > cpi->sf.use_square_only_thresh_high || + bsize < cpi->sf.use_square_only_thresh_low)) { if (cpi->use_svc) { if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) partition_horz_allowed &= force_horz_split; @@ -3839,9 +3840,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } else { // skip rectangular partition test when larger block size // gives better rd cost - if ((cpi->sf.less_rectangular_check) && - ((bsize > cpi->sf.use_square_only_threshold) || - (best_rdc.dist < dist_breakout_thr))) + if (cpi->sf.less_rectangular_check && + (bsize > cpi->sf.use_square_only_thresh_high || + best_rdc.dist < dist_breakout_thr)) do_rect &= !partition_none_allowed; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -3921,8 +3922,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_HORZ; - if ((cpi->sf.less_rectangular_check) && - (bsize > cpi->sf.use_square_only_threshold)) + if (cpi->sf.less_rectangular_check && + bsize > cpi->sf.use_square_only_thresh_high) do_rect = 0; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 118d0bd34..b576b1e4b 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -70,14 +70,15 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, // speed 0 features sf->partition_search_breakout_thr.dist = (1 << 20); sf->partition_search_breakout_thr.rate = 80; - sf->use_square_only_threshold = BLOCK_SIZES; + sf->use_square_only_thresh_high = BLOCK_SIZES; + sf->use_square_only_thresh_low = BLOCK_4X4; if (is_480p_or_larger) { // Currently, the machine-learning based partition search early termination // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. sf->ml_partition_search_early_termination = 1; } else { - sf->use_square_only_threshold = BLOCK_32X32; + sf->use_square_only_thresh_high = BLOCK_32X32; } if (!is_1080p_or_larger) { @@ -95,29 +96,49 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, if (speed >= 1) { sf->ml_partition_search_early_termination = 0; - sf->use_square_only_threshold = BLOCK_4X4; - + sf->use_ml_partition_search_breakout = 1; + if (is_480p_or_larger) + sf->use_square_only_thresh_high = BLOCK_64X64; + else + sf->use_square_only_thresh_high = BLOCK_32X32; + sf->use_square_only_thresh_low = BLOCK_16X16; if (is_720p_or_larger) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - sf->partition_search_breakout_thr.dist = (1 << 23); - sf->use_ml_partition_search_breakout = 0; + sf->partition_search_breakout_thr.dist = (1 << 22); + sf->ml_partition_search_breakout_thresh[0] = -5.0f; + sf->ml_partition_search_breakout_thresh[1] = -5.0f; + sf->ml_partition_search_breakout_thresh[2] = -9.0f; } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 21); - sf->ml_partition_search_breakout_thresh[0] = 0.0f; - sf->ml_partition_search_breakout_thresh[1] = 0.0f; - sf->ml_partition_search_breakout_thresh[2] = 0.0f; + sf->ml_partition_search_breakout_thresh[0] = -1.0f; + sf->ml_partition_search_breakout_thresh[1] = -1.0f; + sf->ml_partition_search_breakout_thresh[2] = -1.0f; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) { + sf->use_square_only_thresh_high = BLOCK_4X4; + sf->use_square_only_thresh_low = BLOCK_SIZES; + if (is_720p_or_larger) { + sf->partition_search_breakout_thr.dist = (1 << 23); + sf->use_ml_partition_search_breakout = 0; + } } +#endif } if (speed >= 2) { + sf->use_square_only_thresh_high = BLOCK_4X4; + sf->use_square_only_thresh_low = BLOCK_SIZES; if (is_720p_or_larger) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; sf->partition_search_breakout_thr.dist = (1 << 24); sf->partition_search_breakout_thr.rate = 120; + sf->use_ml_partition_search_breakout = 0; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->partition_search_breakout_thr.dist = (1 << 22); @@ -220,12 +241,13 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, if (speed >= 1) { sf->enable_tpl_model = 0; - sf->prune_ref_frame_for_rect_partitions = 0; - - sf->ml_prune_rect_partition_threhold[0] = -1; - sf->ml_prune_rect_partition_threhold[1] = -1; - sf->ml_prune_rect_partition_threhold[2] = -1; - sf->ml_prune_rect_partition_threhold[3] = -1; + sf->ml_prune_rect_partition_threhold[1] = 200; + sf->ml_prune_rect_partition_threhold[2] = 200; + sf->ml_prune_rect_partition_threhold[3] = 200; +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) + sf->prune_ref_frame_for_rect_partitions = 0; +#endif // CONFIG_VP9_HIGHBITDEPTH if (oxcf->pass == 2) { TWO_PASS *const twopass = &cpi->twopass; @@ -289,6 +311,10 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->recode_tolerance_low = 15; sf->recode_tolerance_high = 45; sf->enhanced_full_pixel_motion_search = 0; + sf->prune_ref_frame_for_rect_partitions = 0; + sf->ml_prune_rect_partition_threhold[1] = -1; + sf->ml_prune_rect_partition_threhold[2] = -1; + sf->ml_prune_rect_partition_threhold[3] = -1; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { for (i = 0; i < MAX_MESH_STEP; ++i) { @@ -839,7 +865,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->partition_search_type = SEARCH_PARTITION; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; - sf->use_square_only_threshold = BLOCK_SIZES; + sf->use_square_only_thresh_high = BLOCK_SIZES; + sf->use_square_only_thresh_low = BLOCK_4X4; sf->auto_min_max_partition_size = NOT_IN_USE; sf->rd_auto_partition_min_limit = BLOCK_4X4; sf->default_max_partition_size = BLOCK_64X64; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index bc468c0f4..2b91407ad 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -331,14 +331,19 @@ typedef struct SPEED_FEATURES { // rd than partition type split. int less_rectangular_check; - // Disable testing non square partitions. (eg 16x32) + // Disable testing non square partitions(eg 16x32) for block sizes larger than + // use_square_only_thresh_high or smaller than use_square_only_thresh_low. int use_square_partition_only; - BLOCK_SIZE use_square_only_threshold; + BLOCK_SIZE use_square_only_thresh_high; + BLOCK_SIZE use_square_only_thresh_low; // Prune reference frames for rectangular partitions. int prune_ref_frame_for_rect_partitions; // Threshold values used for ML based rectangular partition search pruning. + // If < 0, the feature is turned off. + // Higher values mean more aggressiveness to skip rectangular partition + // search that results in better encoding speed but worse coding performance. int ml_prune_rect_partition_threhold[4]; // Sets min and max partition sizes for this 64x64 region based on the @@ -497,6 +502,8 @@ typedef struct SPEED_FEATURES { // Use ML-based partition search early breakout. int use_ml_partition_search_breakout; + // Higher values mean more aggressiveness for partition search breakout that + // results in better encoding speed but worse compression performance. float ml_partition_search_breakout_thresh[3]; // Machine-learning based partition search early termination