From: Loren Merritt Date: Sat, 21 Apr 2007 11:32:34 +0000 (+0000) Subject: cabac: use bytestream instead of bitstream. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b3076aef6c40f10260ef7386e3f2e028997da5d5;p=libx264 cabac: use bytestream instead of bitstream. 35% faster cabac, 20% faster overall lossless, ~1% faster overall at normal bitrates. git-svn-id: svn://svn.videolan.org/x264/trunk@651 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/cabac.c b/common/cabac.c index 87cdbe66..6d9d9ac0 100644 --- a/common/cabac.c +++ b/common/cabac.c @@ -739,6 +739,13 @@ static const uint8_t x264_cabac_transition[2][128] = 113,114,115,116,117,118,119,120,121,122,123,124,125,126,126,127, }}; +static const uint8_t renorm_shift[64]= { + 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + static const uint8_t x264_cabac_probability[128] = { FIX8(0.9812), FIX8(0.9802), FIX8(0.9792), FIX8(0.9781), @@ -835,124 +842,61 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int } } -/***************************************************************************** - * - *****************************************************************************/ -void x264_cabac_decode_init( x264_cabac_t *cb, bs_t *s ) -{ - cb->i_range = 0x01fe; - cb->i_low = bs_read( s, 9 ); - cb->s = s; -} - -static inline void x264_cabac_decode_renorm( x264_cabac_t *cb ) -{ - while( cb->i_range < 0x0100 ) - { - cb->i_range <<= 1; - cb->i_low = ( cb->i_low << 1 )|bs_read( cb->s, 1 ); - } -} - -int x264_cabac_decode_decision( x264_cabac_t *cb, int i_ctx ) -{ - int i_state = cb->state[i_ctx]; - int i_range_lps = x264_cabac_range_lps[i_state][(cb->i_range>>6)&0x03]; - - int val = (i_state >> 6); - - cb->i_range -= i_range_lps; - - if( cb->i_low >= cb->i_range ) - { - val ^= 1; - - cb->i_low -= cb->i_range; - cb->i_range= i_range_lps; - } - - cb->state[i_ctx] = x264_cabac_transition[val][i_state]; - - x264_cabac_decode_renorm( cb ); - - return val; -} -int x264_cabac_decode_bypass( x264_cabac_t *cb ) -{ - cb->i_low = (cb->i_low << 1)|bs_read( cb->s, 1 ); - - if( cb->i_low >= cb->i_range ) - { - cb->i_low -= cb->i_range; - return 1; - } - return 0; -} -int x264_cabac_decode_terminal( x264_cabac_t *cb ) -{ - if( cb->i_low >= cb->i_range - 2 ) - { - return 1; - } - - cb->i_range -= 2; - x264_cabac_decode_renorm( cb ); - return 0; -} - -/***************************************************************************** - * - *****************************************************************************/ -void x264_cabac_encode_init( x264_cabac_t *cb, bs_t *s ) +void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end ) { cb->i_low = 0; cb->i_range = 0x01FE; - cb->i_bits_outstanding = 0; - cb->s = s; - s->i_left++; // the first bit will be shifted away and not written + cb->i_queue = -1; // the first bit will be shifted away and not written + cb->i_bytes_outstanding = 0; + cb->p_start = p_data; + cb->p = p_data; + cb->p_end = p_end; } -static inline void x264_cabac_putbit( x264_cabac_t *cb, int b ) +static inline void x264_cabac_putbyte( x264_cabac_t *cb ) { - bs_write1( cb->s, b ); - - if( cb->i_bits_outstanding > 0 ) + if( cb->i_queue >= 8 ) { - while( cb->i_bits_outstanding > 32 ) - { - bs_write1( cb->s, 1-b ); - cb->i_bits_outstanding--; - } - bs_write( cb->s, cb->i_bits_outstanding, (1-b)*(~0) ); - cb->i_bits_outstanding = 0; - } -} + int out = cb->i_low >> (cb->i_queue+2); + cb->i_low &= (4<i_queue)-1; + cb->i_queue -= 8; -static inline void x264_cabac_encode_renorm( x264_cabac_t *cb ) -{ - /* RenormE */ - while( cb->i_range < 0x100 ) - { - if( cb->i_low < 0x100 ) - { - x264_cabac_putbit( cb, 0 ); - } - else if( cb->i_low >= 0x200 ) + if( (out & 0xff) == 0xff ) { - cb->i_low -= 0x200; - x264_cabac_putbit( cb, 1 ); + cb->i_bytes_outstanding++; } else { - cb->i_low -= 0x100; - cb->i_bits_outstanding++; + if( cb->p + cb->i_bytes_outstanding + 1 >= cb->p_end ) + return; + int carry = out & 0x100; + if( carry ) + { + // this can't happen on the first byte (buffer underrun), + // because that would correspond to a probability > 1. + // this can't carry beyond the one byte, because any 0xff bytes + // are in bytes_outstanding and thus not written yet. + cb->p[-1]++; + } + while( cb->i_bytes_outstanding > 0 ) + { + *(cb->p++) = carry ? 0 : 0xff; + cb->i_bytes_outstanding--; + } + *(cb->p++) = out; } - - cb->i_range <<= 1; - cb->i_low <<= 1; } } +static inline void x264_cabac_encode_renorm( x264_cabac_t *cb ) +{ + int shift = renorm_shift[cb->i_range>>3]; + cb->i_range <<= shift; + cb->i_low <<= shift; + cb->i_queue += shift; + x264_cabac_putbyte( cb ); +} + void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx, int b ) { int i_state = cb->state[i_ctx]; @@ -975,17 +919,8 @@ void x264_cabac_encode_bypass( x264_cabac_t *cb, int b ) { cb->i_low <<= 1; cb->i_low += (((int32_t)b<<31)>>31) & cb->i_range; - - if( cb->i_low >= 0x400 || cb->i_low < 0x200 ) - { - x264_cabac_putbit( cb, cb->i_low >> 10 ); - cb->i_low &= 0x3ff; - } - else - { - cb->i_low -= 0x200; - cb->i_bits_outstanding++; - } + cb->i_queue += 1; + x264_cabac_putbyte( cb ); } void x264_cabac_encode_terminal( x264_cabac_t *cb, int b ) @@ -994,19 +929,34 @@ void x264_cabac_encode_terminal( x264_cabac_t *cb, int b ) if( b ) { cb->i_low += cb->i_range; - cb->i_range = 2; + cb->i_range = 2<<7; + cb->i_low <<= 7; + cb->i_queue += 7; + x264_cabac_putbyte( cb ); + } + else + { + x264_cabac_encode_renorm( cb ); } - x264_cabac_encode_renorm( cb ); } void x264_cabac_encode_flush( x264_cabac_t *cb ) { - x264_cabac_putbit( cb, (cb->i_low >> 9)&0x01 ); - bs_write1( cb->s, (cb->i_low >> 8)&0x01 ); + cb->i_low |= 0x80; + cb->i_low <<= 10; + cb->i_queue += 10; + x264_cabac_putbyte( cb ); + x264_cabac_putbyte( cb ); + cb->i_queue = 0; + + if( cb->p + cb->i_bytes_outstanding + 1 >= cb->p_end ) + return; //FIXME throw an error instead of silently truncating the frame - /* check that */ - bs_write1( cb->s, 0x01 ); - bs_align_0( cb->s ); + while( cb->i_bytes_outstanding > 0 ) + { + *(cb->p++) = 0xff; + cb->i_bytes_outstanding--; + } } /***************************************************************************** diff --git a/common/cabac.h b/common/cabac.h index 9009314d..5b598fef 100644 --- a/common/cabac.h +++ b/common/cabac.h @@ -34,23 +34,21 @@ typedef struct int i_range; /* bit stream */ - int i_bits_outstanding; + int i_queue; + int i_bytes_outstanding; int f8_bits_encoded; // only if using x264_cabac_size_decision() - bs_t *s; + + uint8_t *p_start; + uint8_t *p; + uint8_t *p_end; } x264_cabac_t; /* init the contexts given i_slice_type, the quantif and the model */ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model ); -/* decoder only (unused): */ -void x264_cabac_decode_init ( x264_cabac_t *cb, bs_t *s ); -int x264_cabac_decode_decision( x264_cabac_t *cb, int i_ctx_idx ); -int x264_cabac_decode_bypass ( x264_cabac_t *cb ); -int x264_cabac_decode_terminal( x264_cabac_t *cb ); - /* encoder only: */ -void x264_cabac_encode_init ( x264_cabac_t *cb, bs_t *s ); +void x264_cabac_encode_init ( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end ); void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx_idx, int b ); void x264_cabac_encode_bypass( x264_cabac_t *cb, int b ); void x264_cabac_encode_terminal( x264_cabac_t *cb, int b ); @@ -63,7 +61,7 @@ int x264_cabac_size_decision_noup( uint8_t *state, int b ); static inline int x264_cabac_pos( x264_cabac_t *cb ) { - return bs_pos( cb->s ) + cb->i_bits_outstanding; + return (cb->p - cb->p_start + cb->i_bytes_outstanding) * 8 + cb->i_queue; } #endif diff --git a/encoder/cabac.c b/encoder/cabac.c index 908a4600..dc7f6596 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -831,30 +831,27 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) #ifdef RDO_SKIP_BS cb->f8_bits_encoded += (384*8) << 8; #else - bs_t *s = cb->s; - bs_align_0( s ); /* not sure */ + if( cb->p + 385 >= cb->p_end ) + return; //FIXME throw an error /* Luma */ - for( i = 0; i < 16*16; i++ ) + for( i = 0; i < 16; i++ ) { - const int x = 16 * h->mb.i_mb_x + (i % 16); - const int y = 16 * h->mb.i_mb_y + (i / 16); - bs_write( s, 8, h->fenc->plane[0][y*h->mb.pic.i_stride[0]+x] ); + memcpy( cb->p, h->fenc->plane[0] + i*h->mb.pic.i_stride[0], 16 ); + cb->p += 16; } /* Cb */ - for( i = 0; i < 8*8; i++ ) + for( i = 0; i < 8; i++ ) { - const int x = 8 * h->mb.i_mb_x + (i % 8); - const int y = 8 * h->mb.i_mb_y + (i / 8); - bs_write( s, 8, h->fenc->plane[1][y*h->mb.pic.i_stride[1]+x] ); + memcpy( cb->p, h->fenc->plane[1] + i*h->mb.pic.i_stride[1], 8 ); + cb->p += 8; } /* Cr */ - for( i = 0; i < 8*8; i++ ) + for( i = 0; i < 8; i++ ) { - const int x = 8 * h->mb.i_mb_x + (i % 8); - const int y = 8 * h->mb.i_mb_y + (i / 8); - bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] ); + memcpy( cb->p, h->fenc->plane[2] + i*h->mb.pic.i_stride[2], 8 ); + cb->p += 8; } - x264_cabac_encode_init( cb, s ); + x264_cabac_encode_init( cb, cb->p, cb->p_end ); #endif return; } diff --git a/encoder/encoder.c b/encoder/encoder.c index 181685a1..1c4ea839 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -1043,7 +1043,7 @@ static void x264_slice_write( x264_t *h ) /* init cabac */ x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc ); - x264_cabac_encode_init ( &h->cabac, &h->out.bs ); + x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end ); } h->mb.i_last_qp = h->sh.i_qp; h->mb.i_last_dqp = 0; @@ -1166,7 +1166,7 @@ static void x264_slice_write( x264_t *h ) if( h->param.b_cabac ) { x264_cabac_encode_flush( &h->cabac ); - + h->out.bs.p = h->cabac.p; } else {