From c4978abc07d04884ec5aeb204e946a9d170bee92 Mon Sep 17 00:00:00 2001 From: sdeng Date: Wed, 24 Oct 2018 16:23:24 -0700 Subject: [PATCH] Enable 10 bit tpl support lowres_bd10 midres_bd10 avg_psnr -0.897 -1.261 ovr_psnr -0.975 -1.349 Change-Id: Id54f2c419f4edaa91e89ffea52b4038b1d94e563 --- vp9/encoder/vp9_encoder.c | 51 ++++++++++++++++++++++++++++---- vp9/encoder/vp9_speed_features.c | 5 ---- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index c189fbd2c..56a423e6c 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -5773,9 +5773,21 @@ void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff, int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; const int shift = tx_size == TX_32X32 ? 0 : 2; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, + p->quant_fp, qcoeff, dqcoeff, pd->dequant, + &eob, scan_order->scan, scan_order->iscan); + } else { + vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, + p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, + scan_order->scan, scan_order->iscan); + } +#else vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan, scan_order->iscan); +#endif // CONFIG_VP9_HIGHBITDEPTH *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift; *recon_error = VPXMAX(*recon_error, 1); @@ -5784,6 +5796,19 @@ void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff, *sse = VPXMAX(*sse, 1); } +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, + TX_SIZE tx_size) { + // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms. + switch (tx_size) { + case TX_8X8: vpx_hadamard_8x8_c(src_diff, bw, coeff); break; + case TX_16X16: vpx_hadamard_16x16_c(src_diff, bw, coeff); break; + case TX_32X32: vpx_hadamard_32x32_c(src_diff, bw, coeff); break; + default: assert(0); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, TX_SIZE tx_size) { switch (tx_size) { @@ -5883,11 +5908,24 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src, src_stride, dst, dst_stride, 0, 0, 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, + dst_stride, xd->bd); + highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); + // TODO(sdeng): Implement SIMD based high bit-depth satd. + intra_cost = vpx_satd_c(coeff, pix_num); + } else { + vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, + dst_stride); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + intra_cost = vpx_satd(coeff, pix_num); + } +#else vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); - wht_fwd_txfm(src_diff, bw, coeff, tx_size); - intra_cost = vpx_satd(coeff, pix_num); +#endif // CONFIG_VP9_HIGHBITDEPTH if (intra_cost < best_intra_cost) best_intra_cost = intra_cost; } @@ -5911,8 +5949,6 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, mi_row, mi_col, &mv.as_mv); #endif - // TODO(jingning): Not yet support high bit-depth in the next three - // steps. #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor( @@ -5923,6 +5959,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_highbd_subtract_block( bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw, xd->bd); + highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); + inter_cost = vpx_satd_c(coeff, pix_num); } else { vp9_build_inter_predictor( ref_frame[rf_idx]->y_buffer + mb_y_offset, @@ -5931,6 +5969,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_subtract_block(bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + inter_cost = vpx_satd(coeff, pix_num); } #else vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset, @@ -5940,10 +5980,9 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_subtract_block(bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw); -#endif wht_fwd_txfm(src_diff, bw, coeff, tx_size); - inter_cost = vpx_satd(coeff, pix_num); +#endif #if CONFIG_NON_GREEDY_MV tpl_stats->inter_cost_arr[rf_idx] = inter_cost; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index db064d3df..1f9044265 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -902,12 +902,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = 99.0; sf->allow_acl = 1; -#if CONFIG_VP9_HIGHBITDEPTH - // TODO(jingning): Make the model support high bit-depth route. - sf->enable_tpl_model = !cm->use_highbitdepth && oxcf->enable_tpl_model; -#else sf->enable_tpl_model = oxcf->enable_tpl_model; -#endif sf->prune_ref_frame_for_rect_partitions = 0; for (i = 0; i < TX_SIZES; i++) { -- 2.40.0