From 8de7dbbec1bc754826227c67cba74ad8a225cfde Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Tue, 26 Aug 2008 14:51:29 -0400 Subject: [PATCH] Activate trellis in p8x8 qpel RD Also clean up macroblock.c with some refactoring Note that this change significantly reduces subme7+trellis2 performance, but improves quality. Issue originally reported by Alex_W. --- encoder/macroblock.c | 78 +++++++++++++++++++++++--------------------- encoder/macroblock.h | 6 ++-- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 27b41ac4..a353ce71 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -79,7 +79,25 @@ static int x264_mb_decimate_score( int16_t *dct, int i_max ) return i_score; } -void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) +static ALWAYS_INLINE void x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra ) +{ + int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY; + if( h->mb.b_trellis ) + x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra ); + else + h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] ); +} + +static ALWAYS_INLINE void x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra ) +{ + int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY; + if( h->mb.b_trellis ) + x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra ); + else + h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] ); +} + +void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp ) { uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]]; uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]]; @@ -93,15 +111,12 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) h->dctf.sub4x4_dct( dct4x4, p_src, p_dst ); - if( h->mb.b_trellis ) - x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 ); - else - h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] ); + x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1 ); if( array_non_zero( dct4x4 ) ) { h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 ); - h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale ); + h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* output samples to fdec */ h->dctf.add4x4_idct( p_dst, dct4x4 ); @@ -110,7 +125,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) memset( h->dct.luma4x4[idx], 0, sizeof(h->dct.luma4x4[idx])); } -void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ) +void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp ) { int x = 8 * (idx&1); int y = 8 * (idx>>1); @@ -120,17 +135,14 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ) h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst ); - if( h->mb.b_trellis ) - x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 ); - else - h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8IY][i_qscale], h->quant8_bias[CQM_8IY][i_qscale] ); + x264_quant_8x8( h, dct8x8, i_qp, 1 ); h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 ); - h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale ); + h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp ); h->dctf.add8x8_idct8( p_dst, dct8x8 ); } -static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) +static void x264_mb_encode_i16x16( x264_t *h, int i_qp ) { uint8_t *p_src = h->mb.pic.p_fenc[0]; uint8_t *p_dst = h->mb.pic.p_fdec[0]; @@ -162,22 +174,19 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) dct4x4[i][0][0] = 0; /* quant/scan/dequant */ - if( h->mb.b_trellis ) - x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 ); - else - h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] ); + x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1 ); h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] ); - h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qscale ); + h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp ); } h->dctf.dct4x4dc( dct_dc4x4 ); - h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qscale][0]>>1, h->quant4_bias[CQM_4IY][i_qscale][0]<<1 ); + h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 ); h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 ); /* output samples to fdec */ h->dctf.idct4x4dc( dct_dc4x4 ); - x264_mb_dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qscale ); /* XXX not inversed */ + x264_mb_dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* XXX not inversed */ /* calculate dct coeffs */ for( i = 0; i < 16; i++ ) @@ -189,7 +198,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) h->dctf.add16x16_idct( p_dst, dct4x4 ); } -void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) +void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp ) { int i, ch; int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate); @@ -225,22 +234,20 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) dct4x4[i][0][0] = 0; /* no trellis; it doesn't seem to help chroma noticeably */ - h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qscale], h->quant4_bias[CQM_4IC+b_inter][i_qscale] ); + h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] ); h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] ); if( b_decimate ) - { i_decimate_score += x264_mb_decimate_score( h->dct.luma4x4[16+i+ch*4]+1, 15 ); - } } h->dctf.dct2x2dc( dct2x2 ); - h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qscale][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qscale][0]<<1 ); + h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 ); zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 ); /* output samples to fdec */ h->dctf.idct2x2dc( dct2x2 ); - x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); /* XXX not inversed */ + x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ); /* XXX not inversed */ if( b_decimate && i_decimate_score < 7 ) { @@ -253,7 +260,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) else { for( i = 0; i < 4; i++ ) - h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); + h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp ); } dct4x4[0][0][0] = dct2x2[0][0]; dct4x4[1][0][0] = dct2x2[0][1]; @@ -446,10 +453,7 @@ void x264_macroblock_encode( x264_t *h ) { if( h->mb.b_noise_reduction ) h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 ); - if( h->mb.b_trellis ) - x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 ); - else - h->quantf.quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] ); + x264_quant_8x8( h, dct8x8[idx], i_qp, 0 ); h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] ); @@ -494,10 +498,7 @@ void x264_macroblock_encode( x264_t *h ) if( h->mb.b_noise_reduction ) h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 ); - if( h->mb.b_trellis ) - x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 ); - else - h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); + x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0 ); h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] ); @@ -776,10 +777,10 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) { DECLARE_ALIGNED_16( int16_t dct8x8[8][8] ); h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec ); - h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] ); + x264_quant_8x8( h, dct8x8, i_qp, 0 ); h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 ); - if( b_decimate ) + if( b_decimate && !h->mb.b_trellis ) nnz8x8 = 4 <= x264_mb_decimate_score( h->dct.luma8x8[i8], 64 ); else nnz8x8 = array_non_zero( dct8x8 ); @@ -796,7 +797,8 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] ); h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec ); for( i4 = 0; i4 < 4; i4++ ) - h->quantf.quant_4x4( dct4x4[i4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); + x264_quant_4x4( h, dct4x4[i4], i_qp, DCT_LUMA_4x4, 0 ); + for( i4 = 0; i4 < 4; i4++ ) h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] ); diff --git a/encoder/macroblock.h b/encoder/macroblock.h index 28b804b0..e2b9d318 100644 --- a/encoder/macroblock.h +++ b/encoder/macroblock.h @@ -43,9 +43,9 @@ void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s ); void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); -void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ); -void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ); -void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ); +void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp ); +void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp ); +void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp ); void x264_cabac_mb_skip( x264_t *h, int b_skip ); -- 2.40.0