From: John Koleszar Date: Tue, 30 Apr 2013 18:29:27 +0000 (-0700) Subject: Create common vp9_encode_sb{,y} X-Git-Tag: v1.3.0~1106^2~68^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3f4e80634b7cfb5ab5be2be362d0fb92dc378f23;p=libvpx Create common vp9_encode_sb{,y} Creates a common encode (subtract, transform, quantize, optimize, inverse transform, reconstruct) function for all sb sizes, including the old 16x16 path. Change-Id: I964dff1ea7a0a5c378046a069ad83495f54df007 --- diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 9e77f8125..9c07b9cd6 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2242,7 +2242,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, mi_row, mi_col, xd->scale_factor, xd->scale_factor_uv); if (!x->skip) { - vp9_encode_inter16x16(cm, x, mi_row, mi_col); + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); + vp9_encode_sb(cm, x, BLOCK_SIZE_MB16X16); } else { vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); #if CONFIG_COMP_INTERINTRA_PRED @@ -2488,90 +2489,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, } else #endif if (!x->skip) { - vp9_subtract_sb(x, bsize); - - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - vp9_transform_sby_32x32(x, bsize); - vp9_quantize_sby_32x32(x, bsize); - if (bsize == BLOCK_SIZE_SB64X64) { - vp9_transform_sbuv_32x32(x, bsize); - vp9_quantize_sbuv_32x32(x, bsize); - } else { - vp9_transform_sbuv_16x16(x, bsize); - vp9_quantize_sbuv_16x16(x, bsize); - } - if (x->optimize) { - vp9_optimize_sby(cm, x, bsize); - if (bsize == BLOCK_SIZE_SB64X64) - vp9_optimize_sbuv(cm, x, bsize); - else - vp9_optimize_sbuv(cm, x, bsize); - } - vp9_inverse_transform_sby_32x32(xd, bsize); - if (bsize == BLOCK_SIZE_SB64X64) - vp9_inverse_transform_sbuv_32x32(xd, bsize); - else - vp9_inverse_transform_sbuv_16x16(xd, bsize); - break; - case TX_16X16: - vp9_transform_sby_16x16(x, bsize); - vp9_quantize_sby_16x16(x, bsize); - if (bsize >= BLOCK_SIZE_SB32X32) { - vp9_transform_sbuv_16x16(x, bsize); - vp9_quantize_sbuv_16x16(x, bsize); - } else { - vp9_transform_sbuv_8x8(x, bsize); - vp9_quantize_sbuv_8x8(x, bsize); - } - if (x->optimize) { - vp9_optimize_sby(cm, x, bsize); - if (bsize >= BLOCK_SIZE_SB32X32) - vp9_optimize_sbuv(cm, x, bsize); - else - vp9_optimize_sbuv(cm, x, bsize); - } - vp9_inverse_transform_sby_16x16(xd, bsize); - if (bsize >= BLOCK_SIZE_SB32X32) - vp9_inverse_transform_sbuv_16x16(xd, bsize); - else - vp9_inverse_transform_sbuv_8x8(xd, bsize); - break; - case TX_8X8: - vp9_transform_sby_8x8(x, bsize); - vp9_quantize_sby_8x8(x, bsize); - if (x->optimize) - vp9_optimize_sby(cm, x, bsize); - vp9_inverse_transform_sby_8x8(xd, bsize); - if (bsize >= BLOCK_SIZE_MB16X16) { - vp9_transform_sbuv_8x8(x, bsize); - vp9_quantize_sbuv_8x8(x, bsize); - if (x->optimize) - vp9_optimize_sbuv(cm, x, bsize); - vp9_inverse_transform_sbuv_8x8(xd, bsize); - } else { - vp9_transform_sbuv_4x4(x, bsize); - vp9_quantize_sbuv_4x4(x, bsize); - if (x->optimize) - vp9_optimize_sbuv(cm, x, bsize); - vp9_inverse_transform_sbuv_4x4(xd, bsize); - } - break; - case TX_4X4: - vp9_transform_sby_4x4(x, bsize); - vp9_transform_sbuv_4x4(x, bsize); - vp9_quantize_sby_4x4(x, bsize); - vp9_quantize_sbuv_4x4(x, bsize); - if (x->optimize) { - vp9_optimize_sby(cm, x, bsize); - vp9_optimize_sbuv(cm, x, bsize); - } - vp9_inverse_transform_sby_4x4(xd, bsize); - vp9_inverse_transform_sbuv_4x4(xd, bsize); - break; - default: assert(0); - } - vp9_recon_sb_c(xd, bsize); + vp9_encode_sb(cm, x, bsize); vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize); } else { // FIXME(rbultje): not tile-aware (mi - 1) diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 15fd4f1b6..5f00b7063 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -480,31 +480,33 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, *a = *l = (final_eob > 0); } -struct optimize_ctx { - ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; - ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; -}; - struct optimize_block_args { VP9_COMMON *cm; MACROBLOCK *x; struct optimize_ctx *ctx; }; -static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, - int ss_txfrm_size, void *arg) { - const struct optimize_block_args* const args = arg; - MACROBLOCKD* const xd = &args->x->e_mbd; +void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb, + struct optimize_ctx *ctx) { + MACROBLOCKD* const xd = &mb->e_mbd; int x, y; // find current entropy context txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); - optimize_b(args->cm, args->x, plane, block, bsize, - &args->ctx->ta[plane][x], &args->ctx->tl[plane][y], + optimize_b(cm, mb, plane, block, bsize, + &ctx->ta[plane][x], &ctx->tl[plane][y], ss_txfrm_size / 2); } +static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + const struct optimize_block_args* const args = arg; + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x, + args->ctx); +} + void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, struct optimize_ctx *ctx) { int p; @@ -553,77 +555,127 @@ void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg); } -#if !CONFIG_SB8X8 -void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - - if (tx_size == TX_16X16) { - vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16); - vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) { - vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); - } - vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); - } else if (tx_size == TX_8X8) { - vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); - if (xd->mode_info_context->mbmi.mode == SPLITMV) { - assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4); - vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); - } else { - vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); - } - } else { - vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) { - vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); - } - vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); +struct encode_b_args { + VP9_COMMON *cm; + MACROBLOCK *x; + struct optimize_ctx *ctx; +}; + +static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args* const args = arg; + MACROBLOCK* const x = args->x; + MACROBLOCKD* const xd = &x->e_mbd; + const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x); + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + int16_t* const src_diff = raster_block_offset_int16(xd, bsize, plane, + raster_block, + x->plane[plane].src_diff); + int16_t* const diff = raster_block_offset_int16(xd, bsize, plane, + raster_block, + xd->plane[plane].diff); + TX_TYPE tx_type = DCT_DCT; + + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_fdct32x32(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) { + vp9_short_fht16x16(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw, tx_type); + } else { + x->fwd_txm16x16(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + } + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) { + vp9_short_fht8x8(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw, tx_type); + } else { + x->fwd_txm8x8(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + } + break; + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) { + vp9_short_fht4x4(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw, tx_type); + } else { + x->fwd_txm4x4(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + } + break; + default: + assert(0); } -} -void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int mi_row, int mi_col) { - MACROBLOCKD *const xd = &x->e_mbd; + vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type); + if (x->optimize) + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx); - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); - vp9_subtract_sb(x, BLOCK_SIZE_MB16X16); - vp9_fidct_mb(cm, x); - vp9_recon_sb(xd, BLOCK_SIZE_MB16X16); + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw * 2); + break; + case TX_16X16: + if (tx_type == DCT_DCT) { + vp9_short_idct16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw * 2); + } else { + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw, tx_type); + } + break; + case TX_8X8: + if (tx_type == DCT_DCT) { + vp9_short_idct8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw * 2); + } else { + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw, tx_type); + } + break; + case TX_4X4: + if (tx_type == DCT_DCT) { + // this is like vp9_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + vp9_inverse_transform_b_4x4(xd, xd->plane[plane].eobs[block], + BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), diff, bw * 2); + } else { + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw, tx_type); + } + break; + } } -#endif -/* this function is used by first pass only */ -void vp9_encode_inter16x16y(MACROBLOCK *x, int mi_row, int mi_col) { - MACROBLOCKD *xd = &x->e_mbd; +void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); - vp9_subtract_sby(x, BLOCK_SIZE_MB16X16); + vp9_subtract_sb(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); - vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); + foreach_transformed_block(xd, bsize, encode_block, &arg); - vp9_recon_sby(xd, BLOCK_SIZE_MB16X16); + vp9_recon_sb(xd, bsize); } diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index b1d8771e0..832247940 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -23,14 +23,8 @@ typedef struct { } MODE_DEFINITION; -struct VP9_ENCODER_RTCD; #if !CONFIG_SB8X8 -void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int mb_row, int mb_col); #endif - -void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); - void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); @@ -40,14 +34,21 @@ void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +struct optimize_ctx { + ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; +}; +void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + struct optimize_ctx *ctx); +void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *x, + struct optimize_ctx *ctx); void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -#if !CONFIG_SB8X8 -void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); -#endif +void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_subtract_block(int rows, int cols, int16_t *diff_ptr, int diff_stride, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index e4d68630d..4d28f1b76 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -626,7 +626,10 @@ void vp9_first_pass(VP9_COMP *cpi) { this_error = motion_error; vp9_set_mbmode_and_mvs(x, NEWMV, &mv); xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_encode_inter16x16y(x, mb_row, mb_col); + vp9_build_inter_predictors_sby(xd, mb_row << CONFIG_SB8X8, + mb_col << CONFIG_SB8X8, + BLOCK_SIZE_MB16X16); + vp9_encode_sb(cm, x, BLOCK_SIZE_MB16X16); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index fe8ba4b64..2de01d09c 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -75,6 +75,43 @@ static void quantize(int16_t *zbin_boost_orig_ptr, *eob_ptr = eob + 1; } +void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs, + TX_TYPE tx_type) { + MACROBLOCKD *const xd = &mb->e_mbd; + const int mul = n_coeffs == 1024 ? 2 : 1; + const int *scan; + + // These contexts may be available in the caller + switch (n_coeffs) { + case 4 * 4: + scan = get_scan_4x4(tx_type); + break; + case 8 * 8: + scan = get_scan_8x8(tx_type); + break; + case 16 * 16: + scan = get_scan_16x16(tx_type); + break; + default: + scan = vp9_default_zig_zag1d_32x32; + break; + } + + quantize(mb->plane[plane].zrun_zbin_boost, + BLOCK_OFFSET(mb->plane[plane].coeff, block, 16), + n_coeffs, mb->skip_block, + mb->plane[plane].zbin, + mb->plane[plane].round, + mb->plane[plane].quant, + mb->plane[plane].quant_shift, + BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16), + BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + xd->plane[plane].dequant, + mb->plane[plane].zbin_extra, + &xd->plane[plane].eobs[block], + scan, mul); +} + void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index fb74cbd29..718a1272d 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -22,6 +22,9 @@ #define prototype_quantize_mb(sym) \ void (sym)(MACROBLOCK *x) +void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coefs, + TX_TYPE tx_type); + void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2, int y_blocks); void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,