From: Fiona Glaser Date: Sat, 20 Jun 2009 01:49:55 +0000 (-0700) Subject: Various CABAC optimizations and cleanups X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=90bec46ba524c3e1a4facaeb3ea21b9ef08e614b;p=libx264 Various CABAC optimizations and cleanups Faster CABAC CBF context calculation for inter blocks. Add x264_constant_p(), will probably be useful in the future as well. Simpler subpartition functions. Clean up and optimize mvd_cpn a bit more. Various other minor optimizations. --- diff --git a/common/osdep.h b/common/osdep.h index 168d6b25..915ec05e 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -78,10 +78,12 @@ #define UNUSED __attribute__((unused)) #define ALWAYS_INLINE __attribute__((always_inline)) inline #define NOINLINE __attribute__((noinline)) +#define x264_constant_p(x) __builtin_constant_p(x) #else #define UNUSED #define ALWAYS_INLINE inline #define NOINLINE +#define x264_constant_p(x) 0 #endif /* threads */ diff --git a/encoder/cabac.c b/encoder/cabac.c index 9bc37148..f51149f0 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -36,11 +36,13 @@ static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_ { x264_cabac_encode_decision_noup( cb, ctx0, 0 ); } +#if !RDO_SKIP_BS else if( i_mb_type == I_PCM ) { x264_cabac_encode_decision_noup( cb, ctx0, 1 ); x264_cabac_encode_flush( h, cb ); } +#endif else { int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode]; @@ -129,10 +131,14 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) ctx++; if( i_mb_type == B_DIRECT ) + { x264_cabac_encode_decision_noup( cb, 27+ctx, 0 ); - else if( i_mb_type == B_8x8 ) + return; + } + x264_cabac_encode_decision_noup( cb, 27+ctx, 1 ); + + if( i_mb_type == B_8x8 ) { - x264_cabac_encode_decision_noup( cb, 27+ctx, 1 ); x264_cabac_encode_decision_noup( cb, 27+3, 1 ); x264_cabac_encode_decision_noup( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); @@ -142,7 +148,6 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) else if( IS_INTRA( i_mb_type ) ) { /* prefix */ - x264_cabac_encode_decision_noup( cb, 27+ctx, 1 ); x264_cabac_encode_decision_noup( cb, 27+3, 1 ); x264_cabac_encode_decision_noup( cb, 27+4, 1 ); x264_cabac_encode_decision( cb, 27+5, 1 ); @@ -154,39 +159,31 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb ) } else { - static const int i_mb_len[9*3] = - { - 6, 6, 3, /* L0 L0 */ - 6, 6, 0, /* L0 L1 */ - 7, 7, 0, /* L0 BI */ - 6, 6, 0, /* L1 L0 */ - 6, 6, 3, /* L1 L1 */ - 7, 7, 0, /* L1 BI */ - 7, 7, 0, /* BI L0 */ - 7, 7, 0, /* BI L1 */ - 7, 7, 6, /* BI BI */ - }; - static const int i_mb_bits[9*3][7] = + static const uint8_t i_mb_bits[9*3][6] = { - { 1,1,0,0,0,1 }, { 1,1,0,0,1,0, }, { 1,0,0 }, /* L0 L0 */ - { 1,1,0,1,0,1 }, { 1,1,0,1,1,0 }, {0}, /* L0 L1 */ - { 1,1,1,0,0,0,0 }, { 1,1,1,0,0,0,1 }, {0}, /* L0 BI */ - { 1,1,0,1,1,1 }, { 1,1,1,1,1,0 }, {0}, /* L1 L0 */ - { 1,1,0,0,1,1 }, { 1,1,0,1,0,0 }, { 1,0,1 }, /* L1 L1 */ - { 1,1,1,0,0,1,0 }, { 1,1,1,0,0,1,1 }, {0}, /* L1 BI */ - { 1,1,1,0,1,0,0 }, { 1,1,1,0,1,0,1 }, {0}, /* BI L0 */ - { 1,1,1,0,1,1,0 }, { 1,1,1,0,1,1,1 }, {0}, /* BI L1 */ - { 1,1,1,1,0,0,0 }, { 1,1,1,1,0,0,1 }, { 1,1,0,0,0,0 }, /* BI BI */ + { 1,0,0,0,1,2 }, { 1,0,0,1,0,2 }, { 0,0,2,2,2,2 }, /* L0 L0 */ + { 1,0,1,0,1,2 }, { 1,0,1,1,0,2 }, {0}, /* L0 L1 */ + { 1,1,0,0,0,0 }, { 1,1,0,0,0,1 }, {0}, /* L0 BI */ + { 1,0,1,1,1,2 }, { 1,1,1,1,0,2 }, {0}, /* L1 L0 */ + { 1,0,0,1,1,2 }, { 1,0,1,0,0,2 }, { 0,1,2,2,2,2 }, /* L1 L1 */ + { 1,1,0,0,1,0 }, { 1,1,0,0,1,1 }, {0}, /* L1 BI */ + { 1,1,0,1,0,0 }, { 1,1,0,1,0,1 }, {0}, /* BI L0 */ + { 1,1,0,1,1,0 }, { 1,1,0,1,1,1 }, {0}, /* BI L1 */ + { 1,1,1,0,0,0 }, { 1,1,1,0,0,1 }, { 1,0,0,0,0,2 }, /* BI BI */ }; const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8); - int i; - x264_cabac_encode_decision_noup( cb, 27+ctx, i_mb_bits[idx][0] ); - x264_cabac_encode_decision_noup( cb, 27+3, i_mb_bits[idx][1] ); - x264_cabac_encode_decision( cb, 27+5-i_mb_bits[idx][1], i_mb_bits[idx][2] ); - for( i = 3; i < i_mb_len[idx]; i++ ) - x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] ); + x264_cabac_encode_decision_noup( cb, 27+3, i_mb_bits[idx][0] ); + x264_cabac_encode_decision( cb, 27+5-i_mb_bits[idx][0], i_mb_bits[idx][1] ); + if( i_mb_bits[idx][2] != 2 ) + { + x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][2] ); + x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][3] ); + x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][4] ); + if( i_mb_bits[idx][5] != 2 ) + x264_cabac_encode_decision_noup( cb, 27+5, i_mb_bits[idx][5] ); + } } } } @@ -305,61 +302,38 @@ void x264_cabac_mb_skip( x264_t *h, int b_skip ) static inline void x264_cabac_mb_sub_p_partition( x264_cabac_t *cb, int i_sub ) { if( i_sub == D_L0_8x8 ) - x264_cabac_encode_decision( cb, 21, 1 ); - else if( i_sub == D_L0_8x4 ) { - x264_cabac_encode_decision( cb, 21, 0 ); - x264_cabac_encode_decision( cb, 22, 0 ); - } - else if( i_sub == D_L0_4x8 ) - { - x264_cabac_encode_decision( cb, 21, 0 ); - x264_cabac_encode_decision( cb, 22, 1 ); - x264_cabac_encode_decision( cb, 23, 1 ); + x264_cabac_encode_decision( cb, 21, 1 ); + return; } - else if( i_sub == D_L0_4x4 ) + x264_cabac_encode_decision( cb, 21, 0 ); + if( i_sub == D_L0_8x4 ) + x264_cabac_encode_decision( cb, 22, 0 ); + else { - x264_cabac_encode_decision( cb, 21, 0 ); x264_cabac_encode_decision( cb, 22, 1 ); - x264_cabac_encode_decision( cb, 23, 0 ); + x264_cabac_encode_decision( cb, 23, i_sub == D_L0_4x8 ); } } -static NOINLINE void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub ) +static inline void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub ) { - static const uint8_t part_bits[12][7] = { - {6,1,1,1,0,1,1}, // D_L0_4x4 - {5,1,1,0,0,1}, // D_L0_8x4 - {5,1,1,0,1,0}, // D_L0_4x8 - {3,1,0,0}, // D_L0_8x8 - {5,1,1,1,1,0}, // D_L1_4x4 - {5,1,1,0,1,1}, // D_L1_8x4 - {6,1,1,1,0,0,0}, // D_L1_4x8 - {3,1,0,1}, // D_L1_8x8 - {5,1,1,1,1,1}, // D_BI_4x4 - {6,1,1,1,0,0,1}, // D_BI_8x4 - {6,1,1,1,0,1,0}, // D_BI_4x8 - {5,1,1,0,0,0}, // D_BI_8x8 - }; - int len; if( i_sub == D_DIRECT_8x8 ) { x264_cabac_encode_decision( cb, 36, 0 ); return; } - len = part_bits[i_sub][0]; - x264_cabac_encode_decision( cb, 36, part_bits[i_sub][1] ); - x264_cabac_encode_decision( cb, 37, part_bits[i_sub][2] ); - if( len == 3 ) - x264_cabac_encode_decision( cb, 39, part_bits[i_sub][3] ); - else + x264_cabac_encode_decision( cb, 36, 1 ); + if( i_sub == D_BI_8x8 ) { - x264_cabac_encode_decision( cb, 38, part_bits[i_sub][3] ); - x264_cabac_encode_decision( cb, 39, part_bits[i_sub][4] ); - x264_cabac_encode_decision( cb, 39, part_bits[i_sub][5] ); - if( len == 6 ) - x264_cabac_encode_decision( cb, 39, part_bits[i_sub][6] ); + x264_cabac_encode_decision( cb, 37, 1 ); + x264_cabac_encode_decision( cb, 38, 0 ); + x264_cabac_encode_decision( cb, 39, 0 ); + x264_cabac_encode_decision( cb, 39, 0 ); + return; } + x264_cabac_encode_decision( cb, 37, 0 ); + x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 ); } static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb ) @@ -376,9 +350,9 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx int i_ref = h->mb.cache.ref[i_list][i8]; int ctx = 0; - if( i_refa > 0 && !h->mb.cache.skip[i8 - 1]) + if( i_refa > 0 && !h->mb.cache.skip[i8 - 1] ) ctx++; - if( i_refb > 0 && !h->mb.cache.skip[i8 - 8]) + if( i_refb > 0 && !h->mb.cache.skip[i8 - 8] ) ctx += 2; while( i_ref > 0 ) @@ -392,52 +366,66 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx ) { - static const uint8_t ctxes[9] = { 0,3,4,5,6,6,6,6,6 }; const int i_abs = abs( mvd ); const int ctxbase = l ? 47 : 40; int i; - +#if RDO_SKIP_BS if( i_abs == 0 ) x264_cabac_encode_decision( cb, ctxbase + ctx, 0 ); else if( i_abs < 9 ) { x264_cabac_encode_decision( cb, ctxbase + ctx, 1 ); -#if RDO_SKIP_BS if( i_abs > 4 ) { - for( i = 1; i < 4; i++ ) - x264_cabac_encode_decision( cb, ctxbase + ctxes[i], 1 ); + x264_cabac_encode_decision( cb, ctxbase + 3, 1 ); + x264_cabac_encode_decision( cb, ctxbase + 4, 1 ); + x264_cabac_encode_decision( cb, ctxbase + 5, 1 ); cb->f8_bits_encoded += cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]]; cb->state[ctxbase+6] = cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]]; } else -#endif { for( i = 1; i < i_abs; i++ ) - x264_cabac_encode_decision( cb, ctxbase + ctxes[i], 1 ); - x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs], 0 ); + x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 ); + x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 ); x264_cabac_encode_bypass( cb, mvd < 0 ); } } else { x264_cabac_encode_decision( cb, ctxbase + ctx, 1 ); -#if RDO_SKIP_BS - for( i = 1; i < 4; i++ ) - x264_cabac_encode_decision( cb, ctxbase + ctxes[i], 1 ); + x264_cabac_encode_decision( cb, ctxbase + 3, 1 ); + x264_cabac_encode_decision( cb, ctxbase + 4, 1 ); + x264_cabac_encode_decision( cb, ctxbase + 5, 1 ); cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]]; cb->state[ctxbase+6] = cabac_transition_5ones[cb->state[ctxbase+6]]; x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 ); + } #else + static const uint8_t ctxes[8] = { 3,4,5,6,6,6,6,6 }; + + if( i_abs == 0 ) + x264_cabac_encode_decision( cb, ctxbase + ctx, 0 ); + else if( i_abs < 9 ) + { + x264_cabac_encode_decision( cb, ctxbase + ctx, 1 ); + for( i = 1; i < i_abs; i++ ) + x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 ); + x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs-1], 0 ); + x264_cabac_encode_bypass( cb, mvd < 0 ); + } + else + { + x264_cabac_encode_decision( cb, ctxbase + ctx, 1 ); for( i = 1; i < 9; i++ ) - x264_cabac_encode_decision( cb, ctxbase + ctxes[i], 1 ); + x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 ); x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 ); x264_cabac_encode_bypass( cb, mvd < 0 ); -#endif } +#endif } -static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width, int height ) +static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width ) { DECLARE_ALIGNED_4( int16_t mvp[2] ); uint32_t amvd; @@ -459,7 +447,7 @@ static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_l #define x264_cabac_mb_mvd(h,cb,i_list,idx,width,height)\ {\ - uint32_t mvd = x264_cabac_mb_mvd(h,cb,i_list,idx,width,height);\ + uint32_t mvd = x264_cabac_mb_mvd(h,cb,i_list,idx,width);\ x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\ } @@ -519,9 +507,14 @@ static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int /* no need to test for skip/pcm */ i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1]; i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8]; - i_nza &= 0x7f + (b_intra << 7); - i_nzb &= 0x7f + (b_intra << 7); - return 85 + 4*i_cat + 2*!!i_nzb + !!i_nza; + if( x264_constant_p(b_intra) && !b_intra ) + return 85 + 4*i_cat + ((2*i_nzb + i_nza)&0x7f); + else + { + i_nza &= 0x7f + (b_intra << 7); + i_nzb &= 0x7f + (b_intra << 7); + return 85 + 4*i_cat + 2*!!i_nzb + !!i_nza; + } case DCT_LUMA_DC: i_nza = (h->mb.cache.i_cbp_left >> 8) & 1; i_nzb = (h->mb.cache.i_cbp_top >> 8) & 1; @@ -875,10 +868,8 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) else if( i_mb_type == P_8x8 ) { /* sub mb type */ - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[0] ); - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[1] ); - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[2] ); - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[3] ); + for( i = 0; i < 4; i++ ) + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] ); /* ref 0 */ if( h->mb.pic.i_fref[0] > 1 ) @@ -895,15 +886,13 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) else if( i_mb_type == B_8x8 ) { /* sub mb type */ - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[0] ); - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[1] ); - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[2] ); - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[3] ); + for( i = 0; i < 4; i++ ) + x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] ); /* ref */ for( i_list = 0; i_list < 2; i_list++ ) { - if( ( i_list ? h->mb.pic.i_fref[1] : h->mb.pic.i_fref[0] ) == 1 ) + if( h->mb.pic.i_fref[i_list] == 1 ) continue; for( i = 0; i < 4; i++ ) if( x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) diff --git a/encoder/rdo.c b/encoder/rdo.c index 6d097a61..43134d6f 100644 --- a/encoder/rdo.c +++ b/encoder/rdo.c @@ -52,10 +52,9 @@ static uint16_t cabac_size_5ones[128]; #undef x264_cabac_encode_decision_noup #define x264_cabac_encode_decision(c,x,v) x264_cabac_size_decision(c,x,v) #define x264_cabac_encode_decision_noup(c,x,v) x264_cabac_size_decision_noup(c,x,v) -#define x264_cabac_encode_terminal(c) x264_cabac_size_decision_noup(c,276,0) +#define x264_cabac_encode_terminal(c) ((c)->f8_bits_encoded += 7) #define x264_cabac_encode_bypass(c,v) ((c)->f8_bits_encoded += 256) #define x264_cabac_encode_ue_bypass(c,e,v) ((c)->f8_bits_encoded += (bs_size_ue_big(v+(1<