From: Fiona Glaser Date: Thu, 4 Sep 2008 05:12:23 +0000 (-0700) Subject: CAVLC cleanup and optimizations X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5d0904bfda094b6243d9d8596c50edd4f0fe5528;p=libx264 CAVLC cleanup and optimizations Also move some small functions in macroblock.c to a .h file so they can be inlined. --- diff --git a/common/macroblock.c b/common/macroblock.c index 9d839f27..0806ea7e 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -24,71 +24,6 @@ #include "common.h" -int x264_mb_predict_intra4x4_mode( x264_t *h, int idx ) -{ - const int ma = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 1]; - const int mb = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 8]; - const int m = X264_MIN( x264_mb_pred_mode4x4_fix(ma), - x264_mb_pred_mode4x4_fix(mb) ); - - if( m < 0 ) - return I_PRED_4x4_DC; - - return m; -} - -int x264_mb_predict_non_zero_code( x264_t *h, int idx ) -{ - const int za = h->mb.cache.non_zero_count[x264_scan8[idx] - 1]; - const int zb = h->mb.cache.non_zero_count[x264_scan8[idx] - 8]; - - int i_ret = za + zb; - - if( i_ret < 0x80 ) - { - i_ret = ( i_ret + 1 ) >> 1; - } - return i_ret & 0x7f; -} - -int x264_mb_transform_8x8_allowed( x264_t *h ) -{ - // intra and skip are disallowed - // large partitions are allowed - // direct and 8x8 are conditional - static const uint8_t partition_tab[X264_MBTYPE_MAX] = { - 0,0,0,0,1,2,0,2,1,1,1,1,1,1,1,1,1,2,0, - }; - int p, i; - - if( !h->pps->b_transform_8x8_mode ) - return 0; - p = partition_tab[h->mb.i_type]; - if( p < 2 ) - return p; - else if( h->mb.i_type == B_DIRECT ) - return h->sps->b_direct8x8_inference; - else if( h->mb.i_type == P_8x8 ) - { - if( !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8) ) - return 1; - for( i=0; i<4; i++ ) - if( h->mb.i_sub_partition[i] != D_L0_8x8 ) - return 0; - return 1; - } - else // B_8x8 - { - // x264 currently doesn't use sub-8x8 B partitions, so don't check for them - if( h->sps->b_direct8x8_inference ) - return 1; - for( i=0; i<4; i++ ) - if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) - return 0; - return 1; - } -} - void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] ) { const int i8 = x264_scan8[idx]; diff --git a/common/macroblock.h b/common/macroblock.h index 0dad40fa..f38c0478 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -314,16 +314,6 @@ void x264_mb_load_mv_direct8x8( x264_t *h, int idx ); * h->mb. need only valid values from other blocks */ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc ); - -int x264_mb_predict_intra4x4_mode( x264_t *h, int idx ); -int x264_mb_predict_non_zero_code( x264_t *h, int idx ); - -/* x264_mb_transform_8x8_allowed: - * check whether any partition is smaller than 8x8 (or at least - * might be, according to just partition type.) - * doesn't check for cbp */ -int x264_mb_transform_8x8_allowed( x264_t *h ); - void x264_mb_mc( x264_t *h ); void x264_mb_mc_8x8( x264_t *h, int i8 ); @@ -446,6 +436,72 @@ static ALWAYS_INLINE int array_non_zero_count( int16_t *v ) return i_nz; } +static inline int x264_mb_predict_intra4x4_mode( x264_t *h, int idx ) +{ + const int ma = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 1]; + const int mb = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 8]; + const int m = X264_MIN( x264_mb_pred_mode4x4_fix(ma), + x264_mb_pred_mode4x4_fix(mb) ); + + if( m < 0 ) + return I_PRED_4x4_DC; + + return m; +} +static inline int x264_mb_predict_non_zero_code( x264_t *h, int idx ) +{ + const int za = h->mb.cache.non_zero_count[x264_scan8[idx] - 1]; + const int zb = h->mb.cache.non_zero_count[x264_scan8[idx] - 8]; + + int i_ret = za + zb; + + if( i_ret < 0x80 ) + { + i_ret = ( i_ret + 1 ) >> 1; + } + return i_ret & 0x7f; +} +/* x264_mb_transform_8x8_allowed: + * check whether any partition is smaller than 8x8 (or at least + * might be, according to just partition type.) + * doesn't check for cbp */ +static inline int x264_mb_transform_8x8_allowed( x264_t *h ) +{ + // intra and skip are disallowed + // large partitions are allowed + // direct and 8x8 are conditional + static const uint8_t partition_tab[X264_MBTYPE_MAX] = { + 0,0,0,0,1,2,0,2,1,1,1,1,1,1,1,1,1,2,0, + }; + int p, i; + + if( !h->pps->b_transform_8x8_mode ) + return 0; + p = partition_tab[h->mb.i_type]; + if( p < 2 ) + return p; + else if( h->mb.i_type == B_DIRECT ) + return h->sps->b_direct8x8_inference; + else if( h->mb.i_type == P_8x8 ) + { + if( !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8) ) + return 1; + for( i=0; i<4; i++ ) + if( h->mb.i_sub_partition[i] != D_L0_8x8 ) + return 0; + return 1; + } + else // B_8x8 + { + // x264 currently doesn't use sub-8x8 B partitions, so don't check for them + if( h->sps->b_direct8x8_inference ) + return 1; + for( i=0; i<4; i++ ) + if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) + return 0; + return 1; + } +} #endif diff --git a/encoder/cavlc.c b/encoder/cavlc.c index 345957f4..b6f4c9a2 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -116,9 +116,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * /* total/trailing */ if( i_idx == BLOCK_INDEX_CHROMA_DC ) - { bs_write_vlc( s, x264_coeff_token[4][i_total*4+i_trailing] ); - } else { /* x264_mb_predict_non_zero_code return 0 <-> (16+16+1)>>1 = 16 */ @@ -132,9 +130,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * i_suffix_length = i_total > 10 && i_trailing < 3 ? 1 : 0; if( i_trailing > 0 ) - { bs_write( s, i_trailing, i_sign ); - } for( i = i_trailing; i < i_total; i++ ) { int mask = level[i] >> 15; @@ -145,19 +141,13 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * i_level_code -= 2; /* as level[i] can't be 1 for the first one if i_trailing < 3 */ if( ( i_level_code >> i_suffix_length ) < 14 ) - { bs_write( s, (i_level_code >> i_suffix_length) + 1 + i_suffix_length, (1< 0 && ( i_level_code >> i_suffix_length ) == 14 ) - { bs_write( s, 15 + i_suffix_length, (1<mb.cache.intra4x4_pred_mode[x264_scan8[i]] ); - if( i_pred == i_mode) - { + if( i_pred == i_mode ) bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */ - } else - { - if( i_mode >= i_pred ) - i_mode--; - bs_write( s, 4, i_mode ); - } + bs_write( s, 4, i_mode - (i_mode > i_pred) ); } bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] ); } @@ -412,9 +396,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) bs_write_ue( s, 0 ); if( h->mb.pic.i_fref[0] > 1 ) - { bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); - } x264_mb_predict_mv( h, 0, 0, 4, mvp ); bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][0] - mvp[0] ); bs_write_se( s, h->mb.cache.mv[0][x264_scan8[0]][1] - mvp[1] ); @@ -457,9 +439,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) else if( i_mb_type == P_8x8 ) { int b_sub_ref0; - - if( h->mb.cache.ref[0][x264_scan8[0]] == 0 && h->mb.cache.ref[0][x264_scan8[4]] == 0 && - h->mb.cache.ref[0][x264_scan8[8]] == 0 && h->mb.cache.ref[0][x264_scan8[12]] == 0 ) + if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] | + h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 ) { bs_write_ue( s, 4 ); b_sub_ref0 = 0; @@ -469,11 +450,14 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) bs_write_ue( s, 3 ); b_sub_ref0 = 1; } + /* sub mb type */ - for( i = 0; i < 4; i++ ) - { - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] ); - } + if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 ) + for( i = 0; i < 4; i++ ) + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] ); + else + bs_write( s, 4, 0xf ); + /* ref0 */ if( h->mb.pic.i_fref[0] > 1 && b_sub_ref0 ) { @@ -492,24 +476,16 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) /* sub mb type */ for( i = 0; i < 4; i++ ) - { bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] ); - } + /* ref */ for( i = 0; i < 4; i++ ) - { if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) - { bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] ); - } - } for( i = 0; i < 4; i++ ) - { if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) - { bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] ); - } - } + /* mvd */ for( i = 0; i < 4; i++ ) cavlc_mb8x8_mvd( h, s, 0, i ); @@ -532,30 +508,27 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) b_list[1][i] = x264_mb_type_list1_table[i_mb_type][i]; } - bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] ); for( i_list = 0; i_list < 2; i_list++ ) { - const int i_ref_max = i_list == 0 ? h->mb.pic.i_fref[0] : h->mb.pic.i_fref[1]; + const int i_ref_max = (i_list == 0 ? h->mb.pic.i_fref[0] : h->mb.pic.i_fref[1]) - 1; - if( i_ref_max > 1 ) - { + if( i_ref_max ) switch( h->mb.i_partition ) { case D_16x16: - if( b_list[i_list][0] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[0]] ); + if( b_list[i_list][0] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[0]] ); break; case D_16x8: - if( b_list[i_list][0] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[0]] ); - if( b_list[i_list][1] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[8]] ); + if( b_list[i_list][0] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[0]] ); + if( b_list[i_list][1] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[8]] ); break; case D_8x16: - if( b_list[i_list][0] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[0]] ); - if( b_list[i_list][1] ) bs_write_te( s, i_ref_max - 1, h->mb.cache.ref[i_list][x264_scan8[4]] ); + if( b_list[i_list][0] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[0]] ); + if( b_list[i_list][1] ) bs_write_te( s, i_ref_max, h->mb.cache.ref[i_list][x264_scan8[4]] ); break; } - } } for( i_list = 0; i_list < 2; i_list++ ) { @@ -601,9 +574,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) } } else if( i_mb_type == B_DIRECT ) - { bs_write_ue( s, 0 ); - } else { x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" ); @@ -617,19 +588,13 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) /* Coded block patern */ if( i_mb_type == I_4x4 || i_mb_type == I_8x8 ) - { bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] ); - } else if( i_mb_type != I_16x16 ) - { bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] ); - } /* transform size 8x8 flag */ if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma ) - { bs_write1( s, h->mb.b_transform_8x8 ); - } /* write residual */ if( i_mb_type == I_16x16 ) @@ -640,19 +605,19 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) block_residual_write_cavlc( h, s, BLOCK_INDEX_LUMA_DC , h->dct.luma16x16_dc, 16 ); /* AC Luma */ - if( h->mb.i_cbp_luma != 0 ) + if( h->mb.i_cbp_luma ) for( i = 0; i < 16; i++ ) { h->mb.cache.non_zero_count[x264_scan8[i]] = array_non_zero_count( h->dct.luma4x4[i] ); block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 ); } } - else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 ) + else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma ) { cavlc_qp_delta( h, s ); x264_macroblock_luma_write_cavlc( h, s, 0, 3 ); } - if( h->mb.i_cbp_chroma != 0 ) + if( h->mb.i_cbp_chroma ) { /* Chroma DC residual present */ block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[0], 4 ); @@ -767,7 +732,7 @@ static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode ) static int x264_i8x8_chroma_size_cavlc( x264_t *h ) { h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] ); - if( h->mb.i_cbp_chroma != 0 ) + if( h->mb.i_cbp_chroma ) { block_residual_write_cavlc( h, &h->out.bs, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[0], 4 ); block_residual_write_cavlc( h, &h->out.bs, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 );