From b91b146d1df46a3f3bbd544548839643cae662c1 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Wed, 7 May 2014 10:39:00 -0700 Subject: [PATCH] Add static-threshold skipping in non-rd mode Added a skipping test in non-rd inter-mode. After interpolation prediction step, the residuals are tested to see if they will be quantized to 0 based on modeling between spatial domain and frequency domain. Set static-thresh to 800 for >=720p and 300 for <720p, rtc set tests showed 1. Speed 5, psnr: -0.514%; ssim: -1.748%; speedup on related clips: 5% -11% 2. Speed 6, psbr: -0.628%; ssim: -1.637%; speedup on related clips: 4% - 9% Change-Id: I62fbf26bc043ecd2b584f255f1a4ee5ab52bfcf3 --- vp9/encoder/vp9_encoder.h | 4 +- vp9/encoder/vp9_pickmode.c | 101 ++++++++++++++++++++++++++++--- vp9/encoder/vp9_speed_features.c | 10 --- vp9/encoder/vp9_speed_features.h | 4 -- 4 files changed, 95 insertions(+), 24 deletions(-) diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 6ffbd3f19..e89811356 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -411,8 +411,8 @@ typedef struct VP9_COMP { // Default value is 1. From first pass stats, encode_breakout may be disabled. ENCODE_BREAKOUT_TYPE allow_encode_breakout; - // Get threshold from external input. In real time mode, it can be - // overwritten according to encoding speed. + // Get threshold from external input. A suggested threshold is 800 for HD + // clips, and 300 for < HD clips. int encode_breakout; unsigned char *segmentation_map; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index d5379f6bc..3877e6612 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -148,7 +148,8 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int *out_rate_sum, int64_t *out_dist_sum) { + int *out_rate_sum, int64_t *out_dist_sum, + unsigned int *var_y, unsigned int *sse_y) { // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. @@ -162,6 +163,9 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); + *var_y = var; + *sse_y = sse; + // TODO(jingning) This is a temporary solution to account for frames with // light changes. Need to customize the rate-distortion modeling for non-RD // mode decision. @@ -198,6 +202,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int rate = INT_MAX; int64_t dist = INT64_MAX; + // var_y and sse_y are saved to be used in skipping checking + unsigned int var_y = UINT_MAX; + unsigned int sse_y = UINT_MAX; VP9_COMMON *cm = &cpi->common; int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); @@ -219,8 +226,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; - if (!x->in_active_map) - x->skip = 1; + // initialize mode decisions *returnrate = INT_MAX; *returndistortion = INT64_MAX; @@ -320,6 +326,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (mbmi->mv[0].as_mv.col & 0x07) != 0)) { int pf_rate[3]; int64_t pf_dist[3]; + unsigned int pf_var[3]; + unsigned int pf_sse[3]; int64_t best_cost = INT64_MAX; INTERP_FILTER best_filter = SWITCHABLE, filter; @@ -328,7 +336,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = filter; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], - &pf_dist[filter]); + &pf_dist[filter], &pf_var[filter], &pf_sse[filter]); cost = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) + pf_rate[filter], pf_dist[filter]); @@ -341,10 +349,12 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = best_filter; rate = pf_rate[mbmi->interp_filter]; dist = pf_dist[mbmi->interp_filter]; + var_y = pf_var[mbmi->interp_filter]; + sse_y = pf_sse[mbmi->interp_filter]; } else { mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); } rate += rate_mv; @@ -352,7 +362,78 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, [INTER_OFFSET(this_mode)]; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); - if (this_rd < best_rd) { + // Skipping checking: test to see if this block can be reconstructed by + // prediction only. + if (!x->in_active_map) { + x->skip = 1; + } else if (cpi->allow_encode_breakout && x->encode_breakout) { + const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); + unsigned int var = var_y, sse = sse_y; + // Skipping threshold for ac. + unsigned int thresh_ac; + // Skipping threshold for dc. + unsigned int thresh_dc; + // Set a maximum for threshold to avoid big PSNR loss in low bit rate + // case. Use extreme low threshold for static frames to limit skipping. + const unsigned int max_thresh = 36000; + // The encode_breakout input + const unsigned int min_thresh = + MIN(((unsigned int)x->encode_breakout << 4), max_thresh); + + // Calculate threshold according to dequant value. + thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; + thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); + + // Adjust ac threshold according to partition size. + thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); + + // Y skipping condition checking for ac and dc. + if (var <= thresh_ac && (sse - var) <= thresh_dc) { + unsigned int sse_u, sse_v; + unsigned int var_u, var_v; + + // Skip u v prediction for less calculation, that won't affect + // result much. + var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, + x->plane[1].src.stride, + xd->plane[1].dst.buf, + xd->plane[1].dst.stride, &sse_u); + + // U skipping condition checking + if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) { + var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, + x->plane[2].src.stride, + xd->plane[2].dst.buf, + xd->plane[2].dst.stride, &sse_v); + + // V skipping condition checking + if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) { + x->skip = 1; + + // The cost of skip bit needs to be added. + rate = rate_mv; + rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; + + // More on this part of rate + // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + + // Scaling factor for SSE from spatial domain to frequency + // domain is 16. Adjust distortion accordingly. + // TODO(yunqingwang): In this function, only y-plane dist is + // calculated. + dist = (sse << 4); // + ((sse_u + sse_v) << 4); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + // *disable_skip = 1; + } + } + } + } + + if (this_rd < best_rd || x->skip) { best_rd = this_rd; *returnrate = rate; *returndistortion = dist; @@ -360,6 +441,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_pred_filter = mbmi->interp_filter; best_ref_frame = ref_frame; } + + if (x->skip) + break; } } @@ -371,14 +455,15 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Perform intra prediction search, if the best SAD is above a certain // threshold. - if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) { + if (!x->skip && best_rd > inter_mode_thresh && + bsize < cpi->sf.max_intra_bsize) { for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { vp9_predict_intra_block(xd, 0, b_width_log2(bsize), mbmi->tx_size, this_mode, &p->src.buf[0], p->src.stride, &pd->dst.buf[0], pd->dst.stride, 0, 0, 0); - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); rate += cpi->mbmode_cost[this_mode]; rate += intra_cost_penalty; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 93e23eee2..7b2d1e2f0 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -146,7 +146,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { sf->static_segmentation = 0; sf->adaptive_rd_thresh = 1; - sf->encode_breakout_thresh = 1; sf->use_fast_coef_costing = 1; if (speed == 1) { @@ -169,7 +168,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 8; } if (speed >= 2) { @@ -208,7 +206,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 200; } if (speed >= 3) { @@ -226,7 +223,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->optimize_coefficients = 0; sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->lpf_pick = LPF_PICK_FROM_Q; - sf->encode_breakout_thresh = 700; } if (speed >= 4) { @@ -245,7 +241,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, } sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; sf->frame_parameter_update = 0; - sf->encode_breakout_thresh = 1000; sf->search_method = FAST_HEX; sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); @@ -338,7 +333,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_fast_coef_costing = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set sf->use_nonrd_pick_mode = 0; - sf->encode_breakout_thresh = 0; for (i = 0; i < BLOCK_SIZES; ++i) sf->disable_inter_mode_mask[i] = 0; sf->max_intra_bsize = BLOCK_64X64; @@ -384,10 +378,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1; - if (cpi->encode_breakout && oxcf->mode == REALTIME && - sf->encode_breakout_thresh > cpi->encode_breakout) - cpi->encode_breakout = sf->encode_breakout_thresh; - if (sf->disable_split_mask == DISABLE_ALL_SPLIT) sf->adaptive_pred_interp_filter = 0; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 46806c9a9..d8c1a8be2 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -321,10 +321,6 @@ typedef struct SPEED_FEATURES { // This flag controls the use of non-RD mode decision. int use_nonrd_pick_mode; - // This variable sets the encode_breakout threshold. Currently, it is only - // enabled in real time mode. - int encode_breakout_thresh; - // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. int disable_inter_mode_mask[BLOCK_SIZES]; -- 2.40.0