From: Fiona Glaser Date: Wed, 19 Mar 2008 21:01:05 +0000 (-0600) Subject: skip intra pred+dct+quant in cases where it's redundant (analyse vs encode) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1d56ef44748dd3ae36751f27263ccefc22d5f543;p=libx264 skip intra pred+dct+quant in cases where it's redundant (analyse vs encode) large speedup with trellis=2, small speedup with trellis=0 and/or subme>=6 --- diff --git a/common/common.h b/common/common.h index 584bcd76..7c4be09a 100644 --- a/common/common.h +++ b/common/common.h @@ -424,6 +424,12 @@ struct x264_t int i_intra16x16_pred_mode; int i_chroma_pred_mode; + /* skip flags for i4x4 and i8x8 + * 0 = encode as normal. + * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction. + * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */ + int i_skip_intra; + struct { /* space for p_fenc and p_fdec */ @@ -432,6 +438,12 @@ struct x264_t DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 ); DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 ); + /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ + DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 ); + DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 ); + DECLARE_ALIGNED( int, i8x8_dct_buf[3][64], 16 ); + DECLARE_ALIGNED( int, i4x4_dct_buf[15][16], 16 ); + /* pointer over mb of the frame to be compressed */ uint8_t *p_fenc[3]; diff --git a/encoder/analyse.c b/encoder/analyse.c index 741ab1eb..02646211 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -229,6 +229,10 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) a->i_satd_i8x8chroma = COST_MAX; a->b_fast_intra = 0; + h->mb.i_skip_intra = + h->mb.b_lossless ? 0 : + a->b_mbrd ? 2 : + !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction; /* II: Inter part P/B frame */ if( h->sh.i_type != SLICE_TYPE_I ) @@ -646,7 +650,15 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ } if( idx == 3 ) + { a->i_satd_i8x8 = i_cost; + if( h->mb.i_skip_intra ) + { + h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 ); + if( h->mb.i_skip_intra == 2 ) + h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) ); + } + } else { a->i_satd_i8x8 = COST_MAX; @@ -723,7 +735,15 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx]; } if( idx == 15 ) + { a->i_satd_i4x4 = i_cost; + if( h->mb.i_skip_intra ) + { + h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 ); + if( h->mb.i_skip_intra == 2 ) + h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.block, sizeof(h->mb.pic.i4x4_dct_buf) ); + } + } else a->i_satd_i4x4 = COST_MAX; } @@ -768,6 +788,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) int i_max, i_satd, i_best, i_mode, i_thresh; int i_pred_mode; int predict_mode[9]; + h->mb.i_skip_intra = 0; if( h->mb.i_type == I_16x16 ) { @@ -2569,6 +2590,8 @@ void x264_macroblock_analyse( x264_t *h ) h->mb.b_trellis = h->param.analyse.i_trellis; h->mb.b_noise_reduction = h->param.analyse.i_noise_reduction; + if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction ) + h->mb.i_skip_intra = 0; } /*-------------------- Update MB from the analysis ----------------------*/ diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 67bc2d83..d095b7c2 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -368,7 +368,15 @@ void x264_macroblock_encode( x264_t *h ) { DECLARE_ALIGNED( uint8_t, edge[33], 16 ); h->mb.b_transform_8x8 = 1; - for( i = 0; i < 4; i++ ) + /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */ + if( h->mb.i_skip_intra ) + { + h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 ); + /* In RD mode, restore the now-overwritten DCT data. */ + if( h->mb.i_skip_intra == 2 ) + h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) ); + } + for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ ) { uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE]; int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]]; @@ -381,7 +389,15 @@ void x264_macroblock_encode( x264_t *h ) else if( h->mb.i_type == I_4x4 ) { h->mb.b_transform_8x8 = 0; - for( i = 0; i < 16; i++ ) + /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */ + if( h->mb.i_skip_intra ) + { + h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 ); + /* In RD mode, restore the now-overwritten DCT data. */ + if( h->mb.i_skip_intra == 2 ) + h->mc.memcpy_aligned( h->dct.block, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) ); + } + for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ ) { uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * FDEC_STRIDE]; int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];