From: Henrik Gramner Date: Sun, 30 May 2010 20:45:14 +0000 (+0200) Subject: Some deblocking-related optimizations X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=156b119f3de35e458f037e87d1ccf467ad86da5b;p=libx264 Some deblocking-related optimizations --- diff --git a/common/deblock.c b/common/deblock.c index adb125bc..9e3a73e2 100644 --- a/common/deblock.c +++ b/common/deblock.c @@ -299,7 +299,7 @@ static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, void x264_frame_deblock_row( x264_t *h, int mb_y ) { int b_interlaced = h->sh.b_mbaff; - int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset); + int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->param.analyse.i_chroma_qp_offset ); int stridey = h->fdec->i_stride[0]; int stride2y = stridey << b_interlaced; int strideuv = h->fdec->i_stride[1]; @@ -318,7 +318,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x; uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x; uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x; - if( b_interlaced && (mb_y&1) ) + if( mb_y & b_interlaced ) { pixy -= 15*stridey; pixu -= 7*strideuv; @@ -366,12 +366,12 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) int qp_top = (qp + qpt + 1) >> 1; int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1; int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] ); - if( !b_interlaced && (intra_cur || intra_top) ) + if( ~b_interlaced & (intra_cur | intra_top) ) FILTER( _intra, 1, 0, qp_top, qpc_top ); else { if( intra_top ) - memset( bs[1][0], 3, sizeof(bs[1][0]) ); + M32( bs[1][0] ) = 0x03030303; FILTER( , 1, 0, qp_top, qpc_top ); } } diff --git a/common/macroblock.c b/common/macroblock.c index 6590895e..6e73ec4d 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -400,9 +400,27 @@ void x264_macroblock_slice_init( x264_t *h ) } } } - if( h->sh.i_type == SLICE_TYPE_P ) + else if( h->sh.i_type == SLICE_TYPE_P ) + { memset( h->mb.cache.skip, 0, sizeof( h->mb.cache.skip ) ); + if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred ) + { + deblock_ref_table(-2) = -2; + deblock_ref_table(-1) = -1; + for( int i = 0; i < h->i_ref0 << h->sh.b_mbaff; i++ ) + { + /* Mask off high bits to avoid frame num collisions with -1/-2. + * In current x264 frame num values don't cover a range of more + * than 32, so 6 bits is enough for uniqueness. */ + if( !h->mb.b_interlaced ) + deblock_ref_table(i) = h->fref0[i]->i_frame_num&63; + else + deblock_ref_table(i) = ((h->fref0[i>>1]->i_frame_num&63)<<1) + (i&1); + } + } + } + /* init with not available (for top right idx=7,15) */ memset( h->mb.cache.ref, -2, sizeof( h->mb.cache.ref ) ); @@ -418,19 +436,6 @@ void x264_macroblock_slice_init( x264_t *h ) h->fdec->inv_ref_poc[field] = (256 + delta/2) / delta; } - deblock_ref_table(-2) = -2; - deblock_ref_table(-1) = -1; - for( int i = 0; i < h->i_ref0 << h->sh.b_mbaff; i++ ) - { - /* Mask off high bits to avoid frame num collisions with -1/-2. - * In current x264 frame num values don't cover a range of more - * than 32, so 6 bits is enough for uniqueness. */ - if( !h->mb.b_interlaced ) - deblock_ref_table(i) = h->fref0[i]->i_frame_num&63; - else - deblock_ref_table(i) = ((h->fref0[i>>1]->i_frame_num&63)<<1) + (i&1); - } - h->mb.i_neighbour4[6] = h->mb.i_neighbour4[9] = h->mb.i_neighbour4[12] = @@ -894,7 +899,6 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y ) void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y ) { int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2; - int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x; h->mb.i_neighbour = 0; h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x; @@ -906,9 +910,9 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_ h->mb.i_neighbour |= MB_LEFT; } - if( top >= 0 ) + if( mb_y > h->mb.b_interlaced ) { - h->mb.i_mb_top_xy = top; + h->mb.i_mb_top_xy = h->mb.i_mb_xy - (h->mb.i_mb_stride << h->mb.b_interlaced); if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_top_xy] == h->mb.slice_table[h->mb.i_mb_xy] ) h->mb.i_neighbour |= MB_TOP; } @@ -930,8 +934,6 @@ void x264_macroblock_cache_load_deblock( x264_t *h ) h->mb.i_neighbour &= ~old_neighbour; if( h->mb.i_neighbour ) { - int left = h->mb.i_mb_left_xy; - int top = h->mb.i_mb_top_xy; int top_y = mb_y - (1 << h->mb.b_interlaced); int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x; int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x; @@ -941,10 +943,11 @@ void x264_macroblock_cache_load_deblock( x264_t *h ) uint8_t (*nnz)[24] = h->mb.non_zero_count; if( h->mb.i_neighbour & MB_TOP ) - CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[top][12] ); + CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] ); if( h->mb.i_neighbour & MB_LEFT ) { + int left = h->mb.i_mb_left_xy; h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3]; h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7]; h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];