From: Loren Merritt Date: Tue, 25 Mar 2008 06:59:50 +0000 (-0600) Subject: faster residual X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=66a0c19d3659bbbc69decb88465f5957cf3611ef;p=libx264 faster residual --- diff --git a/encoder/cabac.c b/encoder/cabac.c index b47b4bb2..705ae155 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -717,7 +717,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl int i_last = 0; int i_sigmap_size; int node_ctx = 0; - int i; + int i, j; const int *significant_coeff_flag_offset; const int *last_coeff_flag_offset; @@ -730,17 +730,17 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl * 5-> Luma8x8 i_idx = luma8x8idx */ - for( i = 0; i < i_count; i++ ) - { + for( j = i_count - 4; j >= 0; j -= 4 ) + if( *(uint64_t*)(l+j) ) + break; + for( i = 0; i < j+4; i++ ) if( l[i] != 0 ) { - i_coeff_abs_m1[i_coeff] = abs( l[i] ) - 1; - i_coeff_sign[i_coeff] = ( l[i] < 0 ); + i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1; + i_coeff_sign[i_coeff] = l[i] < 0; i_coeff++; - i_last = i; } - } if( i_count != 64 ) { diff --git a/encoder/cavlc.c b/encoder/cavlc.c index e04ba5b2..e4e84cc8 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -70,17 +70,15 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * int i_total_zero; int i_last; unsigned int i_sign; - int i; - int i_zero_left; int i_suffix_length; /* first find i_last */ - i_last = i_count - 1; + for( i_last = i_count-1; i_last >= 3; i_last -= 4 ) + if( *(uint64_t*)(l+i_last-3) ) + break; while( i_last >= 0 && l[i_last] == 0 ) - { i_last--; - } i_sign = 0; i_total = 0; @@ -94,16 +92,11 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * /* level and run and total */ while( i_last >= 0 ) { - level[idx] = l[i_last--]; - - run[idx] = 0; - while( i_last >= 0 && l[i_last] == 0 ) - { - run[idx]++; - i_last--; - } - - idx++; + int r = 0; + level[idx] = l[i_last]; + while( --i_last >= 0 && l[i_last] == 0 ) + r++; + run[idx++] = r; } i_total = idx; @@ -112,7 +105,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * i_trailing = X264_MIN(3, idx); for( idx = 0; idx < i_trailing; idx++ ) { - if( abs(level[idx]) > 1 ) + if( (unsigned)(level[idx]+1) > 2 ) { i_trailing = idx; break; @@ -136,9 +129,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * } if( i_total <= 0 ) - { return; - } i_suffix_length = i_total > 10 && i_trailing < 3 ? 1 : 0; if( i_trailing > 0 ) @@ -147,29 +138,18 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * } for( i = i_trailing; i < i_total; i++ ) { - unsigned int i_level_code; + int mask = level[i] >> 15; + int abs_level = (level[i]^mask)-mask; + int i_level_code = abs_level*2-mask-2; - /* calculate level code */ - if( level[i] < 0 ) - { - i_level_code = -2*level[i] - 1; - } - else /* if( level[i] > 0 ) */ - { - i_level_code = 2 * level[i] - 2; - } if( i == i_trailing && i_trailing < 3 ) - { i_level_code -= 2; /* as level[i] can't be 1 for the first one if i_trailing < 3 */ - } if( ( i_level_code >> i_suffix_length ) < 14 ) { bs_write_vlc( s, x264_level_prefix[i_level_code >> i_suffix_length] ); if( i_suffix_length > 0 ) - { bs_write( s, i_suffix_length, i_level_code ); - } } else if( i_suffix_length == 0 && i_level_code < 30 ) { @@ -186,54 +166,31 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t * bs_write_vlc( s, x264_level_prefix[15] ); i_level_code -= 15 << i_suffix_length; if( i_suffix_length == 0 ) - { i_level_code -= 15; - } - if( i_level_code >= 1<<12 ) - { x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d\n", i_level_code ); - } - bs_write( s, 12, i_level_code ); } if( i_suffix_length == 0 ) - { i_suffix_length++; - } - if( abs( level[i] ) > ( 3 << ( i_suffix_length - 1 ) ) && i_suffix_length < 6 ) - { + if( abs_level > (3 << (i_suffix_length-1)) && i_suffix_length < 6 ) i_suffix_length++; - } } if( i_total < i_count ) { if( i_idx == BLOCK_INDEX_CHROMA_DC ) - { bs_write_vlc( s, x264_total_zeros_dc[i_total-1][i_total_zero] ); - } else - { bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] ); - } } - for( i = 0, i_zero_left = i_total_zero; i < i_total - 1; i++ ) + for( i = 0; i < i_total-1 && i_total_zero > 0; i++ ) { - int i_zl; - - if( i_zero_left <= 0 ) - { - break; - } - - i_zl = X264_MIN( i_zero_left - 1, 6 ); - + int i_zl = X264_MIN( i_total_zero - 1, 6 ); bs_write_vlc( s, x264_run_before[i_zl][run[i]] ); - - i_zero_left -= run[i]; + i_total_zero -= run[i]; } }