]> granicus.if.org Git - libx264/commitdiff
cabac: use bytestream instead of bitstream.
authorLoren Merritt <pengvado@videolan.org>
Sat, 21 Apr 2007 11:32:34 +0000 (11:32 +0000)
committerLoren Merritt <pengvado@videolan.org>
Sat, 21 Apr 2007 11:32:34 +0000 (11:32 +0000)
35% faster cabac, 20% faster overall lossless, ~1% faster overall at normal bitrates.

git-svn-id: svn://svn.videolan.org/x264/trunk@651 df754926-b1dd-0310-bc7b-ec298dee348c

common/cabac.c
common/cabac.h
encoder/cabac.c
encoder/encoder.c

index 87cdbe6670eaea3272116a92b5787d9509d58b9f..6d9d9ac0e80d9071d0f8c2e316adf00a16d45fe8 100644 (file)
@@ -739,6 +739,13 @@ static const uint8_t x264_cabac_transition[2][128] =
     113,114,115,116,117,118,119,120,121,122,123,124,125,126,126,127,
 }};
 
+static const uint8_t renorm_shift[64]= {
+ 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
 static const uint8_t x264_cabac_probability[128] =
 {
     FIX8(0.9812), FIX8(0.9802), FIX8(0.9792), FIX8(0.9781),
@@ -835,124 +842,61 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
     }
 }
 
-/*****************************************************************************
- *
- *****************************************************************************/
-void x264_cabac_decode_init( x264_cabac_t *cb, bs_t *s )
-{
-    cb->i_range = 0x01fe;
-    cb->i_low   = bs_read( s, 9 );
-    cb->s       = s;
-}
-
-static inline void x264_cabac_decode_renorm( x264_cabac_t *cb )
-{
-    while( cb->i_range < 0x0100 )
-    {
-        cb->i_range <<= 1;
-        cb->i_low   = ( cb->i_low << 1 )|bs_read( cb->s, 1 );
-    }
-}
-
-int  x264_cabac_decode_decision( x264_cabac_t *cb, int i_ctx )
-{
-    int i_state = cb->state[i_ctx];
-    int i_range_lps = x264_cabac_range_lps[i_state][(cb->i_range>>6)&0x03];
-
-    int val = (i_state >> 6);
-
-    cb->i_range -= i_range_lps;
-
-    if( cb->i_low >= cb->i_range )
-    {
-        val ^= 1;
-
-        cb->i_low -= cb->i_range;
-        cb->i_range= i_range_lps;
-    }
-
-    cb->state[i_ctx] = x264_cabac_transition[val][i_state];
-
-    x264_cabac_decode_renorm( cb );
-
-    return val;
-}
-int  x264_cabac_decode_bypass( x264_cabac_t *cb )
-{
-    cb->i_low = (cb->i_low << 1)|bs_read( cb->s, 1 );
-
-    if( cb->i_low >= cb->i_range )
-    {
-        cb->i_low -= cb->i_range;
-        return 1;
-    }
-    return 0;
-}
-int  x264_cabac_decode_terminal( x264_cabac_t *cb )
-{
-    if( cb->i_low >= cb->i_range - 2 )
-    {
-        return 1;
-    }
-
-    cb->i_range -= 2;
-    x264_cabac_decode_renorm( cb );
-    return 0;
-}
-
-/*****************************************************************************
- *
- *****************************************************************************/
-void x264_cabac_encode_init( x264_cabac_t *cb, bs_t *s )
+void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end )
 {
     cb->i_low   = 0;
     cb->i_range = 0x01FE;
-    cb->i_bits_outstanding = 0;
-    cb->s = s;
-    s->i_left++; // the first bit will be shifted away and not written
+    cb->i_queue = -1; // the first bit will be shifted away and not written
+    cb->i_bytes_outstanding = 0;
+    cb->p_start = p_data;
+    cb->p       = p_data;
+    cb->p_end   = p_end;
 }
 
-static inline void x264_cabac_putbit( x264_cabac_t *cb, int b )
+static inline void x264_cabac_putbyte( x264_cabac_t *cb )
 {
-    bs_write1( cb->s, b );
-
-    if( cb->i_bits_outstanding > 0 )
+    if( cb->i_queue >= 8 )
     {
-        while( cb->i_bits_outstanding > 32 )
-        {
-            bs_write1( cb->s, 1-b );
-            cb->i_bits_outstanding--;
-        }
-        bs_write( cb->s, cb->i_bits_outstanding, (1-b)*(~0) );
-        cb->i_bits_outstanding = 0;
-    }
-}
+        int out = cb->i_low >> (cb->i_queue+2);
+        cb->i_low &= (4<<cb->i_queue)-1;
+        cb->i_queue -= 8;
 
-static inline void x264_cabac_encode_renorm( x264_cabac_t *cb )
-{
-    /* RenormE */
-    while( cb->i_range < 0x100 )
-    {
-        if( cb->i_low < 0x100 )
-        {
-            x264_cabac_putbit( cb, 0 );
-        }
-        else if( cb->i_low >= 0x200 )
+        if( (out & 0xff) == 0xff )
         {
-            cb->i_low -= 0x200;
-            x264_cabac_putbit( cb, 1 );
+            cb->i_bytes_outstanding++;
         }
         else
         {
-            cb->i_low -= 0x100;
-            cb->i_bits_outstanding++;
+            if( cb->p + cb->i_bytes_outstanding + 1 >= cb->p_end )
+                return;
+            int carry = out & 0x100;
+            if( carry )
+            {
+                // this can't happen on the first byte (buffer underrun),
+                // because that would correspond to a probability > 1.
+                // this can't carry beyond the one byte, because any 0xff bytes
+                // are in bytes_outstanding and thus not written yet.
+                cb->p[-1]++;
+            }
+            while( cb->i_bytes_outstanding > 0 )
+            {
+                *(cb->p++) = carry ? 0 : 0xff;
+                cb->i_bytes_outstanding--;
+            }
+            *(cb->p++) = out;
         }
-
-        cb->i_range <<= 1;
-        cb->i_low   <<= 1;
     }
 }
 
+static inline void x264_cabac_encode_renorm( x264_cabac_t *cb )
+{
+    int shift = renorm_shift[cb->i_range>>3];
+    cb->i_range <<= shift;
+    cb->i_low   <<= shift;
+    cb->i_queue  += shift;
+    x264_cabac_putbyte( cb );
+}
+
 void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx, int b )
 {
     int i_state = cb->state[i_ctx];
@@ -975,17 +919,8 @@ void x264_cabac_encode_bypass( x264_cabac_t *cb, int b )
 {
     cb->i_low <<= 1;
     cb->i_low += (((int32_t)b<<31)>>31) & cb->i_range;
-
-    if( cb->i_low >= 0x400 || cb->i_low < 0x200 )
-    {
-        x264_cabac_putbit( cb, cb->i_low >> 10 );
-        cb->i_low &= 0x3ff;
-    }
-    else
-    {
-        cb->i_low -= 0x200;
-        cb->i_bits_outstanding++;
-    }
+    cb->i_queue += 1;
+    x264_cabac_putbyte( cb );
 }
 
 void x264_cabac_encode_terminal( x264_cabac_t *cb, int b )
@@ -994,19 +929,34 @@ void x264_cabac_encode_terminal( x264_cabac_t *cb, int b )
     if( b )
     {
         cb->i_low += cb->i_range;
-        cb->i_range = 2;
+        cb->i_range  = 2<<7;
+        cb->i_low  <<= 7;
+        cb->i_queue += 7;
+        x264_cabac_putbyte( cb );
+    }
+    else
+    {
+        x264_cabac_encode_renorm( cb );
     }
-    x264_cabac_encode_renorm( cb );
 }
 
 void x264_cabac_encode_flush( x264_cabac_t *cb )
 {
-    x264_cabac_putbit( cb, (cb->i_low >> 9)&0x01 );
-    bs_write1( cb->s, (cb->i_low >> 8)&0x01 );
+    cb->i_low |= 0x80;
+    cb->i_low <<= 10;
+    cb->i_queue += 10;
+    x264_cabac_putbyte( cb );
+    x264_cabac_putbyte( cb );
+    cb->i_queue = 0;
+
+    if( cb->p + cb->i_bytes_outstanding + 1 >= cb->p_end )
+        return; //FIXME throw an error instead of silently truncating the frame
 
-    /* check that */
-    bs_write1( cb->s, 0x01 );
-    bs_align_0( cb->s );
+    while( cb->i_bytes_outstanding > 0 )
+    {
+        *(cb->p++) = 0xff;
+        cb->i_bytes_outstanding--;
+    }
 }
 
 /*****************************************************************************
index 9009314d5eafa8f3d639c56f014e8aa2859e3e74..5b598fef92924ba2dea44e42695e457eb917e980 100644 (file)
@@ -34,23 +34,21 @@ typedef struct
     int i_range;
 
     /* bit stream */
-    int i_bits_outstanding;
+    int i_queue;
+    int i_bytes_outstanding;
     int f8_bits_encoded; // only if using x264_cabac_size_decision()
-    bs_t *s;
+
+    uint8_t *p_start;
+    uint8_t *p;
+    uint8_t *p_end;
 
 } x264_cabac_t;
 
 /* init the contexts given i_slice_type, the quantif and the model */
 void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
 
-/* decoder only (unused): */
-void x264_cabac_decode_init    ( x264_cabac_t *cb, bs_t *s );
-int  x264_cabac_decode_decision( x264_cabac_t *cb, int i_ctx_idx );
-int  x264_cabac_decode_bypass  ( x264_cabac_t *cb );
-int  x264_cabac_decode_terminal( x264_cabac_t *cb );
-
 /* encoder only: */
-void x264_cabac_encode_init ( x264_cabac_t *cb, bs_t *s );
+void x264_cabac_encode_init ( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
 void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx_idx, int b );
 void x264_cabac_encode_bypass( x264_cabac_t *cb, int b );
 void x264_cabac_encode_terminal( x264_cabac_t *cb, int b );
@@ -63,7 +61,7 @@ int  x264_cabac_size_decision_noup( uint8_t *state, int b );
 
 static inline int x264_cabac_pos( x264_cabac_t *cb )
 {
-    return bs_pos( cb->s ) + cb->i_bits_outstanding;
+    return (cb->p - cb->p_start + cb->i_bytes_outstanding) * 8 + cb->i_queue;
 }
 
 #endif
index 908a4600997b8661edcad20f62b242399232992d..dc7f6596f67d3d319d9c0440514e5b347e4401f2 100644 (file)
@@ -831,30 +831,27 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
 #ifdef RDO_SKIP_BS
         cb->f8_bits_encoded += (384*8) << 8;
 #else
-        bs_t *s = cb->s;
-        bs_align_0( s );    /* not sure */
+        if( cb->p + 385 >= cb->p_end )
+            return; //FIXME throw an error
         /* Luma */
-        for( i = 0; i < 16*16; i++ )
+        for( i = 0; i < 16; i++ )
         {
-            const int x = 16 * h->mb.i_mb_x + (i % 16);
-            const int y = 16 * h->mb.i_mb_y + (i / 16);
-            bs_write( s, 8, h->fenc->plane[0][y*h->mb.pic.i_stride[0]+x] );
+            memcpy( cb->p, h->fenc->plane[0] + i*h->mb.pic.i_stride[0], 16 );
+            cb->p += 16;
         }
         /* Cb */
-        for( i = 0; i < 8*8; i++ )
+        for( i = 0; i < 8; i++ )
         {
-            const int x = 8 * h->mb.i_mb_x + (i % 8);
-            const int y = 8 * h->mb.i_mb_y + (i / 8);
-            bs_write( s, 8, h->fenc->plane[1][y*h->mb.pic.i_stride[1]+x] );
+            memcpy( cb->p, h->fenc->plane[1] + i*h->mb.pic.i_stride[1], 8 );
+            cb->p += 8;
         }
         /* Cr */
-        for( i = 0; i < 8*8; i++ )
+        for( i = 0; i < 8; i++ )
         {
-            const int x = 8 * h->mb.i_mb_x + (i % 8);
-            const int y = 8 * h->mb.i_mb_y + (i / 8);
-            bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] );
+            memcpy( cb->p, h->fenc->plane[2] + i*h->mb.pic.i_stride[2], 8 );
+            cb->p += 8;
         }
-        x264_cabac_encode_init( cb, s );
+        x264_cabac_encode_init( cb, cb->p, cb->p_end );
 #endif
         return;
     }
index 181685a11858544895363999263090c835cef8e2..1c4ea8398c5acd166d6ef01b9b3ef034dbb4ca99 100644 (file)
@@ -1043,7 +1043,7 @@ static void x264_slice_write( x264_t *h )
 
         /* init cabac */
         x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc );
-        x264_cabac_encode_init ( &h->cabac, &h->out.bs );
+        x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
     }
     h->mb.i_last_qp = h->sh.i_qp;
     h->mb.i_last_dqp = 0;
@@ -1166,7 +1166,7 @@ static void x264_slice_write( x264_t *h )
     if( h->param.b_cabac )
     {
         x264_cabac_encode_flush( &h->cabac );
-
+        h->out.bs.p = h->cabac.p;
     }
     else
     {