From: Loren Merritt Date: Fri, 6 Apr 2007 21:45:33 +0000 (+0000) Subject: faster cabac rdo. up to 10% faster at q0, but negligible at normal bitrates. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2f66c11a4eeb17950b3aee18cc105572e860ec44;p=libx264 faster cabac rdo. up to 10% faster at q0, but negligible at normal bitrates. git-svn-id: svn://svn.videolan.org/x264/trunk@647 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/encoder/cabac.c b/encoder/cabac.c index 7b022389..908a4600 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -783,12 +783,17 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl if( i_prefix != 0 ) { const int i_ctxIdxInc = 5 + X264_MIN( 4, i_abslevelgt1 ) + i_ctx_level; +#ifdef RDO_SKIP_BS + cb->f8_bits_encoded += cabac_prefix_size[i_prefix][cb->state[i_ctxIdxInc]]; + cb->state[i_ctxIdxInc] = cabac_prefix_transition[i_prefix][cb->state[i_ctxIdxInc]]; +#else int j; for( j = 0; j < i_prefix - 1; j++ ) x264_cabac_encode_decision( cb, i_ctxIdxInc, 1 ); if( i_prefix < 14 ) x264_cabac_encode_decision( cb, i_ctxIdxInc, 0 ); - else /* suffix */ +#endif + if( i_prefix >= 14 ) x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs_m1[i] - 14 ); i_abslevelgt1++; @@ -797,6 +802,9 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl i_abslevel1++; /* write sign */ +#ifdef RDO_SKIP_BS + if( i_prefix == 0 ) +#endif x264_cabac_encode_bypass( cb, i_coeff_sign[i] ); } } diff --git a/encoder/rdo.c b/encoder/rdo.c index e10702f5..b204639f 100644 --- a/encoder/rdo.c +++ b/encoder/rdo.c @@ -26,6 +26,9 @@ #define RDO_SKIP_BS +static int cabac_prefix_transition[15][128]; +static int cabac_prefix_size[15][128]; + /* CAVLC: produces exactly the same bit count as a normal encode */ /* this probably still leaves some unnecessary computations */ #define bs_write1(s,v) ((s)->i_bits_encoded += 1) @@ -213,8 +216,6 @@ int x264_rd_cost_i8x8_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct ) #define LAMBDA_BITS 4 /* precalculate the cost of coding abs_level_m1 */ -static int cabac_prefix_transition[15][128]; -static int cabac_prefix_size[15][128]; void x264_rdo_init( ) { int i_prefix; @@ -318,7 +319,7 @@ static void quant_trellis_cabac( x264_t *h, int16_t *dct, uint8_t cabac_state_last[64]; const int b_interlaced = h->mb.b_interlaced; const int f = 1 << 15; // no deadzone - int i_last_nnz = -1; + int i_last_nnz; int i, j; // (# of coefs) * (# of ctx) * (# of levels tried) = 1024 @@ -331,21 +332,25 @@ static void quant_trellis_cabac( x264_t *h, int16_t *dct, int i_levels_used = 1; /* init coefs */ - for( i = b_ac; i < i_coefs; i++ ) - { - int coef = dct[zigzag[i]]; - abs_coefs[i] = abs(coef); - signs[i] = coef < 0 ? -1 : 1; - if( f <= abs_coefs[i] * quant_mf[zigzag[i]] ) - i_last_nnz = i; - } + for( i = i_coefs-1; i >= b_ac; i-- ) + if( (unsigned)(dct[zigzag[i]] * quant_mf[zigzag[i]] + f-1) >= 2*f ) + break; - if( i_last_nnz == -1 ) + if( i < b_ac ) { memset( dct, 0, i_coefs * sizeof(*dct) ); return; } + i_last_nnz = i; + + for( ; i >= b_ac; i-- ) + { + int coef = dct[zigzag[i]]; + abs_coefs[i] = abs(coef); + signs[i] = coef < 0 ? -1 : 1; + } + /* init trellis */ for( i = 1; i < 8; i++ ) nodes_cur[i].score = TRELLIS_SCORE_MAX;