From b4b09c97964d41134af2d58c48ae13f72ab40719 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Wed, 6 Aug 2014 22:48:37 -0700 Subject: [PATCH] Enable fast forward txfm and quant for rate-distortion search This commit enables encoder to select fast forward transform and quantization path according to the prediction residual sse/variance, in the rate-distortion optimization scheme. Change-Id: Ief9fc3844fd4107166d401970e800c6e5ce2b5fe --- vp9/encoder/vp9_block.h | 3 +++ vp9/encoder/vp9_quantize.c | 33 ++++++++++++++---------- vp9/encoder/vp9_rdopt.c | 51 +++++++++++++++++++++++++++++++++----- 3 files changed, 68 insertions(+), 19 deletions(-) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 39dfb7e1d..3e80eb52c 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -41,6 +41,7 @@ struct macroblock_plane { int16_t *zbin; int16_t *round; + int64_t quant_thred[2]; // Zbin Over Quant value int16_t zbin_extra; }; @@ -117,6 +118,8 @@ struct macroblock { // skip forward transform and quantization int skip_txfm[MAX_MB_PLANE]; + int64_t bsse[MAX_MB_PLANE]; + // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index e153b2077..eababdbca 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -23,15 +23,14 @@ void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { - int eob = -1; + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int tmp, eob = -1; if (!skip_block) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 16; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr; @@ -45,15 +44,15 @@ void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { - int eob = -1; + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int tmp, eob = -1; if (!skip_block) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; @@ -354,6 +353,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[0].quant_shift = quants->y_quant_shift[qindex]; x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; + x->plane[0].quant_thred[0] = cm->y_dequant[qindex][0] * + cm->y_dequant[qindex][0]; + x->plane[0].quant_thred[1] = cm->y_dequant[qindex][1] * + cm->y_dequant[qindex][1]; x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); xd->plane[0].dequant = cm->y_dequant[qindex]; @@ -365,6 +368,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; + x->plane[i].quant_thred[0] = cm->y_dequant[qindex][0] * + cm->y_dequant[qindex][0]; + x->plane[i].quant_thred[1] = cm->y_dequant[qindex][1] * + cm->y_dequant[qindex][1]; x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); xd->plane[i].dequant = cm->uv_dequant[qindex]; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 177066b32..35ba85d74 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -171,15 +171,27 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int64_t dist_sum = 0; const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; + const int shift = 8; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, &sse); + const unsigned int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, + &sse); + if (!x->select_tx_size) { + if (sse < p->quant_thred[0] >> shift) + x->skip_txfm[i] = 1; + else if (var < p->quant_thred[1] >> shift) + x->skip_txfm[i] = 2; + else + x->skip_txfm[i] = 0; + } + + x->bsse[i] = sse; if (i == 0) x->pred_sse[ref] = sse; @@ -357,12 +369,32 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (args->skip) return; - if (!is_inter_block(mbmi)) + if (!is_inter_block(mbmi)) { vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip); - else - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + dist_block(plane, block, tx_size, args); + } else { + if (x->skip_txfm[plane] == 0) { + // full forward transform and quantization + vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + dist_block(plane, block, tx_size, args); + } else if (x->skip_txfm[plane] == 2) { + // compute DC coefficient + int16_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); + int16_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); + vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + args->sse = x->bsse[plane] << 4; + args->dist = args->sse; + if (!x->plane[plane].eobs[block]) + args->dist = args->sse - ((coeff[0] * coeff[0] - + (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2); + } else { + // skip forward transform + x->plane[plane].eobs[block] = 0; + args->sse = x->bsse[plane] << 4; + args->dist = args->sse; + } + } - dist_block(plane, block, tx_size, args); rate_block(plane, block, plane_bsize, tx_size, args); rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); @@ -2103,6 +2135,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; INTERP_FILTER best_filter = SWITCHABLE; + int skip_txfm[MAX_MB_PLANE] = {0}; + int64_t bsse[MAX_MB_PLANE] = {0}; int bsl = mi_width_log2_lookup[bsize]; int pred_filter_search = cpi->sf.cb_pred_filter_search ? @@ -2265,6 +2299,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_filter = mbmi->interp_filter; if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; + vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); + vpx_memcpy(bsse, x->bsse, sizeof(bsse)); } if ((cm->interp_filter == SWITCHABLE && newbest) || @@ -2317,6 +2353,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, disable_skip, this_rd); } + vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); + vpx_memcpy(x->bsse, bsse, sizeof(bsse)); + if (!x->skip) { int skippable_y, skippable_uv; int64_t sseuv = INT64_MAX; -- 2.40.0