From e30bb5318d32ae107560c7242b2f361abed0c6a6 Mon Sep 17 00:00:00 2001 From: Simon Horlick Date: Wed, 16 Mar 2011 21:18:59 +0000 Subject: [PATCH] MBAFF: Add mbaff deblock strength calculation Move call to deblock_strength to x264_macroblock_deblock_strength to keep deblock strength calculation in one place. --- common/deblock.c | 57 +++++++++++++++++++++++++++++++++++++++------ common/frame.h | 4 ++-- common/macroblock.c | 12 ++++++++-- common/macroblock.h | 2 +- encoder/encoder.c | 13 ++--------- tools/checkasm.c | 6 ++--- 6 files changed, 68 insertions(+), 26 deletions(-) diff --git a/common/deblock.c b/common/deblock.c index 78d84e27..71f22e64 100644 --- a/common/deblock.c +++ b/common/deblock.c @@ -243,7 +243,7 @@ static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int bet static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit, - int bframe ) + int bframe, x264_t *h ) { for( int dir = 0; dir < 2; dir++ ) { @@ -269,6 +269,47 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264 } } } +void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], + int mvy_limit, int bframe, x264_t *h ) +{ + int neighbour_field[2]; + neighbour_field[0] = h->mb.i_mb_left_xy[0] >= 0 && h->mb.field[h->mb.i_mb_left_xy[0]]; + neighbour_field[1] = h->mb.i_mb_top_xy >= 0 && h->mb.field[h->mb.i_mb_top_xy]; + int intra_cur = IS_INTRA( h->mb.i_type ); + + if( !intra_cur ) + { + for( int dir = 0; dir < 2; dir++ ) + { + int edge_stride = dir ? 8 : 1; + int part_stride = dir ? 1 : 8; + for( int edge = 0; edge < 4; edge++ ) + { + for( int i = 0, q = X264_SCAN8_0+edge*edge_stride; i < 4; i++, q += part_stride ) + { + int p = q - edge_stride; + if( nnz_cache[q] || nnz_cache[p] ) + { + bs[dir][edge][i] = 2; + } + else if( (edge == 0 && h->mb.b_interlaced != neighbour_field[dir]) || + ref[0][q] != ref[0][p] || + abs( mv[0][q][0] - mv[0][p][0] ) >= 4 || + abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit || + (bframe && (ref[1][q] != ref[1][p] || + abs( mv[1][q][0] - mv[1][p][0] ) >= 4 || + abs( mv[1][q][1] - mv[1][p][1] ) >= mvy_limit )) ) + { + bs[dir][edge][i] = 1; + } + else + bs[dir][edge][i] = 0; + } + } + } + } +} static inline void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter ) { @@ -411,7 +452,7 @@ void x264_macroblock_deblock( x264_t *h ) memset( bs, 3, 2*4*4*sizeof(uint8_t) ); else h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, - bs, 4 >> h->sh.b_mbaff, h->sh.i_type == SLICE_TYPE_B ); + bs, 4 >> h->sh.b_mbaff, h->sh.i_type == SLICE_TYPE_B, h ); int transform_8x8 = h->mb.b_transform_8x8; pixel *fdec = h->mb.pic.p_fdec[0]; @@ -454,16 +495,16 @@ void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int be void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta ); void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], - int mvy_limit, int bframe ); + int mvy_limit, int bframe, x264_t *h ); void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], - int mvy_limit, int bframe ); + int mvy_limit, int bframe, x264_t *h ); void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], - int mvy_limit, int bframe ); + int mvy_limit, int bframe, x264_t *h ); void x264_deblock_strength_avx ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], - int mvy_limit, int bframe ); + int mvy_limit, int bframe, x264_t *h ); #if ARCH_X86 void x264_deblock_h_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 ); void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); @@ -505,7 +546,7 @@ void x264_deblock_v_chroma_neon( uint8_t *, int, int, int, int8_t * ); void x264_deblock_h_chroma_neon( uint8_t *, int, int, int, int8_t * ); #endif -void x264_deblock_init( int cpu, x264_deblock_function_t *pf ) +void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) { pf->deblock_luma[1] = deblock_v_luma_c; pf->deblock_luma[0] = deblock_h_luma_c; @@ -585,4 +626,6 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf ) } #endif #endif // !HIGH_BIT_DEPTH + + if( b_mbaff ) pf->deblock_strength = deblock_strength_mbaff_c; } diff --git a/common/frame.h b/common/frame.h index 4b86df0e..a6d0020e 100644 --- a/common/frame.h +++ b/common/frame.h @@ -183,7 +183,7 @@ typedef struct x264_deblock_intra_t deblock_chroma_intra[2]; void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit, - int bframe ); + int bframe, x264_t *h ); } x264_deblock_function_t; x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ); @@ -202,7 +202,7 @@ void x264_macroblock_deblock( x264_t *h ); void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ); void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame ); -void x264_deblock_init( int cpu, x264_deblock_function_t *pf ); +void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ); void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ); void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); diff --git a/common/macroblock.c b/common/macroblock.c index 0ae560b7..4f3099f2 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -1224,10 +1224,14 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_ } } -void x264_macroblock_cache_load_deblock( x264_t *h ) +void x264_macroblock_deblock_strength( x264_t *h ) { + uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x]; if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) ) - return; + { + memset( bs, 3, 2*4*4*sizeof(uint8_t) ); + if( !h->sh.b_mbaff ) return; + } /* If we have multiple slices and we're deblocking on slice edges, we * have to reload neighbour data. */ @@ -1367,6 +1371,10 @@ void x264_macroblock_cache_load_deblock( x264_t *h ) M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*3] ) = nnzbot; } } + + int mvy_limit = 4 >> h->mb.b_interlaced; + h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, + bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B, h ); } static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_mbaff ) diff --git a/common/macroblock.h b/common/macroblock.h index 5fec0ac1..a60226a2 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -291,7 +291,7 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead ); void x264_macroblock_slice_init( x264_t *h ); void x264_macroblock_thread_init( x264_t *h ); void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y ); -void x264_macroblock_cache_load_deblock( x264_t *h ); +void x264_macroblock_deblock_strength( x264_t *h ); void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y ); void x264_macroblock_cache_save( x264_t *h ); diff --git a/encoder/encoder.c b/encoder/encoder.c index ee1ac22e..d77335c3 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -1099,7 +1099,7 @@ x264_t *x264_encoder_open( x264_param_t *param ) x264_zigzag_init( h->param.cpu, &h->zigzagf, h->param.b_interlaced ); x264_mc_init( h->param.cpu, &h->mc ); x264_quant_init( h, h->param.cpu, &h->quantf ); - x264_deblock_init( h->param.cpu, &h->loopf ); + x264_deblock_init( h->param.cpu, &h->loopf, h->param.b_interlaced ); x264_bitstream_init( h->param.cpu, &h->bsf ); x264_dct_init_weights(); @@ -2224,16 +2224,7 @@ reencode: /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */ if( b_deblock ) - { - int mvy_limit = 4 >> h->sh.b_mbaff; - uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x]; - x264_macroblock_cache_load_deblock( h ); - if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) ) - memset( bs, 3, 2*4*4*sizeof(uint8_t) ); - else - h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, - bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B ); - } + x264_macroblock_deblock_strength( h ); x264_ratecontrol_mb( h, mb_size ); diff --git a/tools/checkasm.c b/tools/checkasm.c index 3cf246b0..0fcd7490 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -1278,9 +1278,9 @@ static int check_deblock( int cpu_ref, int cpu_new ) int alphas[36], betas[36]; int8_t tcs[36][4]; - x264_deblock_init( 0, &db_c ); - x264_deblock_init( cpu_ref, &db_ref ); - x264_deblock_init( cpu_new, &db_a ); + x264_deblock_init( 0, &db_c, 0 ); + x264_deblock_init( cpu_ref, &db_ref, 0 ); + x264_deblock_init( cpu_new, &db_a, 0 ); /* not exactly the real values of a,b,tc but close enough */ for( int i = 35, a = 255, c = 250; i >= 0; i-- ) -- 2.40.0