From: Fiona Glaser Date: Thu, 14 Jul 2011 21:04:11 +0000 (-0700) Subject: Clean up MBAFF deblocking code X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=75466d2e4fff1aeba7e64a1947e8beea3f1235ff;p=libx264 Clean up MBAFF deblocking code --- diff --git a/common/deblock.c b/common/deblock.c index a14ecc27..0626daab 100644 --- a/common/deblock.c +++ b/common/deblock.c @@ -269,7 +269,7 @@ static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int bet static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, - int bframe, x264_t *h ) + int bframe ) { for( int dir = 0; dir < 2; dir++ ) { @@ -296,162 +296,6 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264 } } -void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe, x264_t *h ) -{ - int neighbour_field[2]; - neighbour_field[0] = h->mb.i_mb_left_xy[0] >= 0 && h->mb.field[h->mb.i_mb_left_xy[0]]; - neighbour_field[1] = h->mb.i_mb_top_xy >= 0 && h->mb.field[h->mb.i_mb_top_xy]; - int intra_cur = IS_INTRA( h->mb.i_type ); - - if( !intra_cur ) - { - for( int dir = 0; dir < 2; dir++ ) - { - int edge_stride = dir ? 8 : 1; - int part_stride = dir ? 1 : 8; - for( int edge = 0; edge < 4; edge++ ) - { - for( int i = 0, q = X264_SCAN8_0+edge*edge_stride; i < 4; i++, q += part_stride ) - { - int p = q - edge_stride; - if( nnz_cache[q] || nnz_cache[p] ) - { - bs[dir][edge][i] = 2; - } - else if( (edge == 0 && MB_INTERLACED != neighbour_field[dir]) || - ref[0][q] != ref[0][p] || - abs( mv[0][q][0] - mv[0][p][0] ) >= 4 || - abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit || - (bframe && (ref[1][q] != ref[1][p] || - abs( mv[1][q][0] - mv[1][p][0] ) >= 4 || - abs( mv[1][q][1] - mv[1][p][1] ) >= mvy_limit )) ) - { - bs[dir][edge][i] = 1; - } - else - bs[dir][edge][i] = 0; - } - } - } - } - - if( h->mb.i_neighbour & MB_LEFT ) - { - if( h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED ) - { - static const uint8_t offset[2][2][8] = { - { { 0, 0, 0, 0, 1, 1, 1, 1 }, - { 2, 2, 2, 2, 3, 3, 3, 3 }, }, - { { 0, 1, 2, 3, 0, 1, 2, 3 }, - { 0, 1, 2, 3, 0, 1, 2, 3 }, } - }; - uint8_t bS[8]; - - if( intra_cur ) - memset( bS, 4, 8 ); - else - { - const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1]; - uint8_t (*nnz)[48] = h->mb.non_zero_count; - - for( int i = 0; i < 8; i++ ) - { - int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1]; - int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)]; - int nnz_left = nnz[left][3 + 4*off[i]]; - if( !h->param.b_cabac && h->pps->b_transform_8x8_mode ) - { - int j = off[i]&~1; - if( h->mb.mb_transform_size[left] ) - nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] )); - } - if( IS_INTRA( h->mb.type[left] ) ) - bS[i] = 4; - else if( nnz_left || nnz_this ) - bS[i] = 2; - else // As left is different interlaced. - bS[i] = 1; - } - } - - if( MB_INTERLACED ) - { - for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[i]; - for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[4+i]; - } - else - { - for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[2*i]; - for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[1+2*i]; - } - } - } - - if( h->mb.i_neighbour & MB_TOP ) - { - if( !(h->mb.i_mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] ) - { - /* Need to filter both fields (even for frame macroblocks). - * Filter top two rows using the top macroblock of the above - * pair and then the bottom one. */ - int mbn_xy = h->mb.i_mb_xy - 2 * h->mb.i_mb_stride; - uint32_t nnz_cur[4]; - nnz_cur[0] = h->mb.cache.non_zero_count[x264_scan8[0]+0]; - nnz_cur[1] = h->mb.cache.non_zero_count[x264_scan8[0]+1]; - nnz_cur[2] = h->mb.cache.non_zero_count[x264_scan8[0]+2]; - nnz_cur[3] = h->mb.cache.non_zero_count[x264_scan8[0]+3]; - /* Munge NNZ for cavlc + 8x8dct */ - if( !h->param.b_cabac && h->pps->b_transform_8x8_mode && - h->mb.mb_transform_size[h->mb.i_mb_xy] ) - { - int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ); - int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] ); - nnz_cur[0] = nnz_cur[1] = !!nnz0; - nnz_cur[2] = nnz_cur[3] = !!nnz1; - } - - for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride ) - { - int mbn_intra = IS_INTRA( h->mb.type[mbn_xy] ); - uint8_t (*nnz)[48] = h->mb.non_zero_count; - - uint32_t nnz_top[4]; - nnz_top[0] = nnz[mbn_xy][3*4+0]; - nnz_top[1] = nnz[mbn_xy][3*4+1]; - nnz_top[2] = nnz[mbn_xy][3*4+2]; - nnz_top[3] = nnz[mbn_xy][3*4+3]; - - if( !h->param.b_cabac && h->pps->b_transform_8x8_mode && - (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[mbn_xy] ) - { - int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] ); - int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] ); - nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0; - nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0; - } - - uint8_t bS[4]; - if( intra_cur || mbn_intra ) - M32( bS ) = 0x03030303; - else - { - for( int i = 0; i < 4; i++ ) - { - if( nnz_cur[i] || nnz_top[i] ) - bS[i] = 2; - else - bS[i] = 1; - } - } - for( int i = 0; i < 4; i++ ) - bs[1][4*j][i] = bS[i]; - } - } - } -} - static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int a, int b, int b_chroma, x264_deblock_inter_t pf_inter ) { int index_a = i_qp + a; @@ -557,7 +401,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]]; luma_qp[0] = (qp + left_qp[0] + 1) >> 1; chroma_qp[0] = (qpc + h->chroma_qp_table[left_qp[0]] + 1) >> 1; - if( bs[0][0][0] == 4 ) + if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] ) ) { deblock_edge_intra( h, pixy, 2*stridey, bs[0][0], luma_qp[0], a, b, 0, luma_intra_deblock ); deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock ); @@ -576,7 +420,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]]; luma_qp[1] = (qp + left_qp[1] + 1) >> 1; chroma_qp[1] = (qpc + h->chroma_qp_table[left_qp[1]] + 1) >> 1; - if( bs[0][4][0] == 4) + if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[1]] ) ) { deblock_edge_intra( h, pixy + (stridey<mb.i_mb_stride; - for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride) + for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride ) { int qpt = h->mb.qp[mbn_xy]; int qp_top = (qp + qpt + 1) >> 1; int qpc_top = (qpc + h->chroma_qp_table[qpt] + 1) >> 1; + int intra_top = IS_INTRA( h->mb.type[mbn_xy] ); + if( intra_cur || intra_top ) + M32( bs[1][4*j] ) = 0x03030303; // deblock the first horizontal edge of the even rows, then the first horizontal edge of the odd rows deblock_edge( h, pixy + j*stridey, 2* stridey, bs[1][4*j], qp_top, a, b, 0, deblock_v_luma_c ); @@ -646,7 +493,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) } else { - if( intra_top ) + if( intra_cur || intra_top ) M32( bs[1][0] ) = 0x03030303; FILTER( , 1, 0, qp_top, qpc_top ); } @@ -691,7 +538,7 @@ void x264_macroblock_deblock( x264_t *h ) } else h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, - bs, 4 >> SLICE_MBAFF, h->sh.i_type == SLICE_TYPE_B, h ); + bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B ); int transform_8x8 = h->mb.b_transform_8x8; @@ -742,16 +589,16 @@ void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int be void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta ); void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe, x264_t *h ); + int mvy_limit, int bframe ); void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe, x264_t *h ); + int mvy_limit, int bframe ); void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe, x264_t *h ); + int mvy_limit, int bframe ); void x264_deblock_strength_avx ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe, x264_t *h ); + int mvy_limit, int bframe ); #if ARCH_X86 void x264_deblock_h_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 ); void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); @@ -873,6 +720,4 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) } #endif #endif // !HIGH_BIT_DEPTH - - if( b_mbaff ) pf->deblock_strength = deblock_strength_mbaff_c; } diff --git a/common/frame.h b/common/frame.h index db066f39..fb27df8f 100644 --- a/common/frame.h +++ b/common/frame.h @@ -184,7 +184,7 @@ typedef struct x264_deblock_intra_t deblock_chroma_intra[2]; void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, - int bframe, x264_t *h ); + int bframe ); } x264_deblock_function_t; void x264_frame_delete( x264_frame_t *frame ); diff --git a/common/macroblock.c b/common/macroblock.c index 8d1c157e..7ac64908 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -1375,14 +1375,90 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_ h->mb.i_neighbour |= MB_TOP; } +static void x264_macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][4] ) +{ + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED ) + { + static const uint8_t offset[2][2][8] = + { { { 0, 0, 0, 0, 1, 1, 1, 1 }, + { 2, 2, 2, 2, 3, 3, 3, 3 }, }, + { { 0, 1, 2, 3, 0, 1, 2, 3 }, + { 0, 1, 2, 3, 0, 1, 2, 3 }, } + }; + ALIGNED_ARRAY_8( uint8_t, tmpbs, [8] ); + + const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1]; + uint8_t (*nnz)[48] = h->mb.non_zero_count; + + for( int i = 0; i < 8; i++ ) + { + int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1]; + int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)]; + int nnz_left = nnz[left][3 + 4*off[i]]; + if( !h->param.b_cabac && h->pps->b_transform_8x8_mode ) + { + int j = off[i]&~1; + if( h->mb.mb_transform_size[left] ) + nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] )); + } + tmpbs[i] = (nnz_left || nnz_this) ? 2 : 1; + } + + if( MB_INTERLACED ) + { + CP32( bs[0][0], &tmpbs[0] ); + CP32( bs[0][4], &tmpbs[4] ); + } + else + { + for( int i = 0; i < 4; i++ ) bs[0][0][i] = tmpbs[2*i]; + for( int i = 0; i < 4; i++ ) bs[0][4][i] = tmpbs[1+2*i]; + } + } + + if( (h->mb.i_neighbour & MB_TOP) && MB_INTERLACED != h->mb.field[h->mb.i_mb_top_xy] ) + { + if( !(h->mb.i_mb_y&1) && !MB_INTERLACED ) + { + /* Need to filter both fields (even for frame macroblocks). + * Filter top two rows using the top macroblock of the above + * pair and then the bottom one. */ + int mbn_xy = h->mb.i_mb_xy - 2 * h->mb.i_mb_stride; + uint8_t *nnz_cur = &h->mb.cache.non_zero_count[x264_scan8[0]]; + + for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride ) + { + uint8_t (*nnz)[48] = h->mb.non_zero_count; + + ALIGNED_4( uint8_t nnz_top[4] ); + CP32( nnz_top, &nnz[mbn_xy][3*4] ); + + if( !h->param.b_cabac && h->pps->b_transform_8x8_mode && h->mb.mb_transform_size[mbn_xy] ) + { + int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] ); + int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] ); + nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0; + nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0; + } + + for( int i = 0; i < 4; i++ ) + bs[1][4*j][i] = (nnz_cur[i] || nnz_top[i]) ? 2 : 1; + } + } + else + for( int i = 0; i < 4; i++ ) + bs[1][0][i] = X264_MAX( bs[1][0][i], 1 ); + } +} + void x264_macroblock_deblock_strength( x264_t *h ) { uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x]; if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) ) { - memset( bs[0], 3, 4*4*sizeof(uint8_t) ); - memset( bs[1], 3, 4*4*sizeof(uint8_t) ); - if( !SLICE_MBAFF ) return; + memset( bs[0][1], 3, 3*4*sizeof(uint8_t) ); + memset( bs[1][1], 3, 3*4*sizeof(uint8_t) ); + return; } /* If we have multiple slices and we're deblocking on slice edges, we @@ -1528,9 +1604,11 @@ void x264_macroblock_deblock_strength( x264_t *h ) } } - int mvy_limit = 4 >> MB_INTERLACED; h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, - bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B, h ); + bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B ); + + if( SLICE_MBAFF ) + x264_macroblock_deblock_strength_mbaff( h, bs ); } static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff ) diff --git a/tools/checkasm.c b/tools/checkasm.c index 61b0f4b3..8a30e325 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -1396,8 +1396,8 @@ static int check_deblock( int cpu_ref, int cpu_new ) mv[j][k][l] = ((rand()&7) != 7) ? (rand()&7) - 3 : (rand()&1023) - 512; } set_func_name( "deblock_strength" ); - call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1), NULL ); - call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1), NULL ); + call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1) ); + call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1) ); if( memcmp( bs[0], bs[1], sizeof(bs[0]) ) ) { ok = 0;