From: Yunqing Wang Date: Mon, 27 Feb 2017 22:26:15 +0000 (-0800) Subject: Apply machine learning-based early termination in VP9 partition search X-Git-Tag: v1.7.0~636^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=670101439fe4a976fcacf997ff383b6cd6704596;p=libvpx Apply machine learning-based early termination in VP9 partition search This patch was based on Yang Xian's intern project code. Further modifications were done. 1. Moved machine-learning related parameters into the context structure. 2. Corrected the calculation of sum_eobs. 3. Removed unused parameters and calculations. 4. Made it work with multiple tiles. 5. Added a speed feature for the machine-learning based partition search early termination. 6. Re-organized the code. The patch was rebased to the top-of-tree. Borg test BDRATE result: 4k set: PSNR: +0.144%; SSIM: +0.043%; hdres set: PSNR: +0.149%; SSIM: +0.269%; midres set: PSNR: +0.127%; SSIM: +0.257%; Average speed gain result: 4k clips: 22%; hd clips: 23%; midres clips: 15%. Change-Id: I0220e93a8277e6a7ea4b2c34b605966e3b1584ac --- diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 86ba03d69..af1a93a00 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -71,6 +71,9 @@ typedef struct { // search loop MV pred_mv[MAX_REF_FRAMES]; INTERP_FILTER pred_interp_filter; + + // Used for the machine learning-based early termination + int sum_eobs; } PICK_MODE_CONTEXT; typedef struct PC_TREE { diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 2d530a716..df72667d4 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -52,6 +52,33 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +// Machine learning-based early termination parameters. +static const double train_mean[24] = { + 303501.697372, 3042630.372158, 24.694696, 1.392182, + 689.413511, 162.027012, 1.478213, 0.0, + 135382.260230, 912738.513263, 28.845217, 1.515230, + 544.158492, 131.807995, 1.436863, 0.0, + 43682.377587, 208131.711766, 28.084737, 1.356677, + 138.254122, 119.522553, 1.252322, 0.0 +}; + +static const double train_stdm[24] = { + 673689.212982, 5996652.516628, 0.024449, 1.989792, + 985.880847, 0.014638, 2.001898, 0.0, + 208798.775332, 1812548.443284, 0.018693, 1.838009, + 396.986910, 0.015657, 1.332541, 0.0, + 55888.847031, 448587.962714, 0.017900, 1.904776, + 98.652832, 0.016598, 1.320992, 0.0 +}; + +// Error tolerance: 0.01%-0.0.05%-0.1% +static const double classifiers[24] = { + 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863, + 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134, + 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700, + 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211 +}; + // This is used as a reference when computing the source variance for the // purpose of activity masking. // Eventually this should be replaced by custom no-reference routines, @@ -2684,6 +2711,18 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, } #endif +// Accumulate all tx blocks' eobs results got from the partition evaluation. +static void accumulate_eobs(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + void *arg) { + PICK_MODE_CONTEXT *ctx = (PICK_MODE_CONTEXT *)arg; + (void)row; + (void)col; + (void)plane_bsize; + (void)tx_size; + ctx->sum_eobs += ctx->eobs_pbuf[plane][1][block]; +} + // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. @@ -2863,15 +2902,92 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - // If all y, u, v transform blocks in this partition are skippable, and - // the dist & rate are within the thresholds, the partition search is - // terminated for current branch of the partition search tree. - if (!x->e_mbd.lossless && ctx->skippable && - ((best_rdc.dist < (dist_breakout_thr >> 2)) || - (best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr))) { - do_split = 0; - do_rect = 0; + if (!cpi->sf.ml_partition_search_early_termination) { + // If all y, u, v transform blocks in this partition are skippable, + // and the dist & rate are within the thresholds, the partition search + // is terminated for current branch of the partition search tree. + if (!x->e_mbd.lossless && ctx->skippable && + ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr))) { + do_split = 0; + do_rect = 0; + } + } else { + // Currently, the machine-learning based partition search early + // termination is only used while bsize is 16x16, 32x32 or 64x64, + // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. + if (ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { + const double *clf; + const double *mean; + const double *sd; + const int mag_mv = + abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); + const int left_in_image = !!xd->left_mi; + const int above_in_image = !!xd->above_mi; + MODE_INFO **prev_mi = + &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row]; + int above_par = 0; // above_partitioning + int left_par = 0; // left_partitioning + int last_par = 0; // last_partitioning + BLOCK_SIZE context_size; + double score; + int offset = 0; + + assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); + + ctx->sum_eobs = 0; + vp9_foreach_transformed_block_in_plane(xd, bsize, 0, + accumulate_eobs, ctx); + + if (above_in_image) { + context_size = xd->above_mi->sb_type; + if (context_size < bsize) + above_par = 2; + else if (context_size == bsize) + above_par = 1; + } + + if (left_in_image) { + context_size = xd->left_mi->sb_type; + if (context_size < bsize) + left_par = 2; + else if (context_size == bsize) + left_par = 1; + } + + if (prev_mi) { + context_size = prev_mi[0]->sb_type; + if (context_size < bsize) + last_par = 2; + else if (context_size == bsize) + last_par = 1; + } + + if (bsize == BLOCK_64X64) + offset = 0; + else if (bsize == BLOCK_32X32) + offset = 8; + else if (bsize == BLOCK_16X16) + offset = 16; + + // early termination score calculation + clf = &classifiers[offset]; + mean = &train_mean[offset]; + sd = &train_stdm[offset]; + score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) + + clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) + + clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) + + clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * + sd[3]) + + clf[4] * (((double)ctx->sum_eobs - mean[4]) / sd[4]) + + clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) + + clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7]; + if (score < 0) { + do_split = 0; + do_rect = 0; + } + } } #if CONFIG_FP_MB_STATS @@ -2984,7 +3100,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->partitioning = PARTITION_SPLIT; // Rate and distortion based partition search termination clause. - if (!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || + if (!cpi->sf.ml_partition_search_early_termination && + !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || (best_rdc.dist < dist_breakout_thr && best_rdc.rate < rate_breakout_thr))) { do_rect = 0; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index dfe132cbd..859d4eac0 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -71,7 +71,15 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->partition_search_breakout_thr.dist = (1 << 20); sf->partition_search_breakout_thr.rate = 80; + // Currently, the machine-learning based partition search early termination + // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. + if (VPXMIN(cm->width, cm->height) >= 480) { + sf->ml_partition_search_early_termination = 1; + } + if (speed >= 1) { + sf->ml_partition_search_early_termination = 0; + if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; @@ -586,6 +594,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { // Some speed-up features even for best quality as minimal impact on quality. sf->partition_search_breakout_thr.dist = (1 << 19); sf->partition_search_breakout_thr.rate = 80; + sf->ml_partition_search_early_termination = 0; if (oxcf->mode == REALTIME) { set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed); diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index b04cd872c..96898eea3 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -449,6 +449,9 @@ typedef struct SPEED_FEATURES { // Partition search early breakout thresholds. PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr; + // Machine-learning based partition search early termination + int ml_partition_search_early_termination; + // Allow skipping partition search for still image frame int allow_partition_search_skip;