]> granicus.if.org Git - libx264/commitdiff
Faster CABAC residual encoding
authorFiona Glaser <fiona@x264.com>
Fri, 7 Nov 2008 19:39:43 +0000 (11:39 -0800)
committerFiona Glaser <fiona@x264.com>
Fri, 7 Nov 2008 19:39:43 +0000 (11:39 -0800)
6% faster block_residual_write_cabac in RD mode.

encoder/cabac.c
encoder/cavlc.c
encoder/rdo.c

index c4124cc3af915f69f00ef9dd7c77adbe049afc20..3dbc8fc5ddb1cba8c2d863a7714e2384990dc591 100644 (file)
 #include "common/common.h"
 #include "macroblock.h"
 
+#ifndef RDO_SKIP_BS
+#define RDO_SKIP_BS 0
+#endif
+
 static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
                     int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
 {
@@ -304,7 +308,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
     /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
     if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
     {
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
         h->mb.i_qp = h->mb.i_last_qp;
 #endif
         i_dqp = 0;
@@ -335,7 +339,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
     x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 );
 }
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
 void x264_cabac_mb_skip( x264_t *h, int b_skip )
 {
     int ctx = (h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left ))
@@ -454,7 +458,7 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
     else if( i_abs < 9 )
     {
         x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
-#ifdef RDO_SKIP_BS
+#if RDO_SKIP_BS
         if( i_abs > 4 )
         {
             for( i = 1; i < 4; i++ )
@@ -474,7 +478,7 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
     else
     {
         x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
-#ifdef RDO_SKIP_BS
+#if RDO_SKIP_BS
         for( i = 1; i < 4; i++ )
             x264_cabac_encode_decision( cb, ctxbase + ctxes[i], 1 );
         cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]];
@@ -557,68 +561,67 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
     int i_nza = 0;
     int i_nzb = 0;
 
-    if( i_cat == DCT_LUMA_DC )
-    {
-        if( h->mb.i_neighbour & MB_LEFT )
-        {
-            i_mba_xy = h->mb.i_mb_xy - 1;
-            i_nza = h->mb.cbp[i_mba_xy] & 0x100;
-        }
-        if( h->mb.i_neighbour & MB_TOP )
-        {
-            i_mbb_xy = h->mb.i_mb_top_xy;
-            i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
-        }
-    }
-    else if( i_cat == DCT_LUMA_AC || i_cat == DCT_LUMA_4x4 )
-    {
-        if( i_idx & ~10 ) // block_idx_x > 0
-            i_mba_xy = h->mb.i_mb_xy;
-        else if( h->mb.i_neighbour & MB_LEFT )
-            i_mba_xy = h->mb.i_mb_xy - 1;
-
-        if( i_idx & ~5 ) // block_idx_y > 0
-            i_mbb_xy = h->mb.i_mb_xy;
-        else if( h->mb.i_neighbour & MB_TOP )
-            i_mbb_xy = h->mb.i_mb_top_xy;
-
-        /* no need to test for skip/pcm */
-        if( i_mba_xy >= 0 )
-            i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
-        if( i_mbb_xy >= 0 )
-            i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
-    }
-    else if( i_cat == DCT_CHROMA_DC )
-    {
-        /* no need to test skip/pcm */
-        if( h->mb.i_neighbour & MB_LEFT )
-        {
-            i_mba_xy = h->mb.i_mb_xy - 1;
-            i_nza = h->mb.cbp[i_mba_xy] & (0x200 << i_idx);
-        }
-        if( h->mb.i_neighbour & MB_TOP )
-        {
-            i_mbb_xy = h->mb.i_mb_top_xy;
-            i_nzb = h->mb.cbp[i_mbb_xy] & (0x200 << i_idx);
-        }
-    }
-    else if( i_cat == DCT_CHROMA_AC )
+    switch( i_cat )
     {
-        if( i_idx & 1 )
-            i_mba_xy = h->mb.i_mb_xy;
-        else if( h->mb.i_neighbour & MB_LEFT )
-            i_mba_xy = h->mb.i_mb_xy - 1;
-
-        if( i_idx & 2 )
-            i_mbb_xy = h->mb.i_mb_xy;
-        else if( h->mb.i_neighbour & MB_TOP )
-            i_mbb_xy = h->mb.i_mb_top_xy;
-
-        /* no need to test skip/pcm */
-        if( i_mba_xy >= 0 )
-            i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
-        if( i_mbb_xy >= 0 )
-            i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
+        case DCT_LUMA_DC:
+            if( h->mb.i_neighbour & MB_LEFT )
+            {
+                i_mba_xy = h->mb.i_mb_xy - 1;
+                i_nza = h->mb.cbp[i_mba_xy] & 0x100;
+            }
+            if( h->mb.i_neighbour & MB_TOP )
+            {
+                i_mbb_xy = h->mb.i_mb_top_xy;
+                i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
+            }
+            break;
+        case DCT_LUMA_AC:
+        case DCT_LUMA_4x4:
+            if( i_idx & ~10 ) // block_idx_x > 0
+                i_mba_xy = h->mb.i_mb_xy;
+            else if( h->mb.i_neighbour & MB_LEFT )
+                i_mba_xy = h->mb.i_mb_xy - 1;
+
+            if( i_idx & ~5 ) // block_idx_y > 0
+                i_mbb_xy = h->mb.i_mb_xy;
+            else if( h->mb.i_neighbour & MB_TOP )
+                i_mbb_xy = h->mb.i_mb_top_xy;
+
+            /* no need to test for skip/pcm */
+            if( i_mba_xy >= 0 )
+                i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
+            if( i_mbb_xy >= 0 )
+                i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
+            break;
+        case DCT_CHROMA_DC:
+            /* no need to test skip/pcm */
+            if( h->mb.i_neighbour & MB_LEFT )
+            {
+                i_mba_xy = h->mb.i_mb_xy - 1;
+                i_nza = h->mb.cbp[i_mba_xy] & (0x200 << i_idx);
+            }
+            if( h->mb.i_neighbour & MB_TOP )
+            {
+                i_mbb_xy = h->mb.i_mb_top_xy;
+                i_nzb = h->mb.cbp[i_mbb_xy] & (0x200 << i_idx);
+            }
+            break;
+        case DCT_CHROMA_AC:
+            if( i_idx & 1 )
+                i_mba_xy = h->mb.i_mb_xy;
+            else if( h->mb.i_neighbour & MB_LEFT )
+                i_mba_xy = h->mb.i_mb_xy - 1;
+
+            if( i_idx & 2 )
+                i_mbb_xy = h->mb.i_mb_xy;
+            else if( h->mb.i_neighbour & MB_TOP )
+                i_mbb_xy = h->mb.i_mb_top_xy;
+
+            /* no need to test skip/pcm */
+            if( i_mba_xy >= 0 )
+                i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
+            if( i_mbb_xy >= 0 )
+                i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
     }
 
     if( IS_INTRA( h->mb.i_type ) )
@@ -659,8 +662,6 @@ static const uint8_t last_coeff_flag_offset_8x8[63] = {
     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
 };
-static const uint8_t identity[16] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
 
 // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
 //           4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
@@ -680,6 +681,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
     const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
     const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
     const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat];
+    const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
 
     int i_coeff_abs_m1[64];
     int UNUSED i_coeff_sign[64];
@@ -689,9 +691,6 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
     int node_ctx = 0;
     int i, j;
 
-    const uint8_t *significant_coeff_flag_offset;
-    const uint8_t *last_coeff_flag_offset;
-
     /* yes this is always aligned, and l[-1] exists in the cases where it's used (ac) */
     for( j = i_count - 4; j >= -1; j -= 4 )
         if( *(uint64_t*)(l+j) )
@@ -710,38 +709,37 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
         }
     }
 
-    significant_coeff_flag_offset = (i_ctxBlockCat == DCT_LUMA_8x8)
-                                  ? significant_coeff_flag_offset_8x8[h->mb.b_interlaced]
-                                  : identity;
-    last_coeff_flag_offset = (i_ctxBlockCat == DCT_LUMA_8x8)
-                           ? last_coeff_flag_offset_8x8 : identity;
-
     for( i = j; i < j+4; i++)
         if( l[i] )
             i_last = i;
 
     i_sigmap_size = X264_MIN( i_last+1, i_count-1 );
 
-    for( i = 0; i < i_sigmap_size; i++ )
-    {
-        if( l[i] )
-        {
-            i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;
-#ifndef RDO_SKIP_BS
-            i_coeff_sign[i_coeff]   = l[i] < 0;
-#endif
-            i_coeff++;
-            x264_cabac_encode_decision( cb, i_ctx_sig + significant_coeff_flag_offset[i], 1 );
-            x264_cabac_encode_decision( cb, i_ctx_last + last_coeff_flag_offset[i], i == i_last );
-        }
-        else
-            x264_cabac_encode_decision( cb, i_ctx_sig + significant_coeff_flag_offset[i], 0 );
+#define WRITE_SIGMAP( l8x8 )\
+    for( i = 0; i < i_sigmap_size; i++ )\
+    {\
+        if( l[i] )\
+        {\
+            i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
+            if( !RDO_SKIP_BS )\
+                i_coeff_sign[i_coeff]   = l[i] < 0;\
+            i_coeff++;\
+            x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 1 );\
+            x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), i == i_last );\
+        }\
+        else\
+            x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 0 );\
     }
 
+    if( i_ctxBlockCat == DCT_LUMA_8x8 )
+        WRITE_SIGMAP( 1 )
+    else
+        WRITE_SIGMAP( 0 )
+
     if( i == i_last )
     {
         i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
         i_coeff_sign[i_coeff]   = l[i] < 0;
 #endif
         i_coeff++;
@@ -760,7 +758,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
         {
             x264_cabac_encode_decision( cb, ctx, 1 );
             ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
-#ifdef RDO_SKIP_BS
+#if RDO_SKIP_BS
             cb->f8_bits_encoded += cabac_size_unary[i_prefix][cb->state[ctx]];
             cb->state[ctx] = cabac_transition_unary[i_prefix][cb->state[ctx]];
 #else
@@ -778,12 +776,12 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
         {
             x264_cabac_encode_decision( cb, ctx, 0 );
             node_ctx = coeff_abs_level_transition[0][node_ctx];
-#ifdef RDO_SKIP_BS
+#if RDO_SKIP_BS
             x264_cabac_encode_bypass( cb, 0 ); // sign
 #endif
         }
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
         x264_cabac_encode_bypass( cb, i_coeff_sign[i_coeff] );
 #endif
     } while( i_coeff > 0 );
@@ -797,7 +795,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
     int i_list;
     int i;
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     const int i_mb_pos_start = x264_cabac_pos( cb );
     int       i_mb_pos_tex;
 #endif
@@ -805,7 +803,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
     /* Write the MB type */
     x264_cabac_mb_type( h, cb );
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     if( i_mb_type == I_PCM )
     {
         i_mb_pos_tex = x264_cabac_pos( cb );
@@ -980,7 +978,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
         }
     }
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     i_mb_pos_tex = x264_cabac_pos( cb );
     h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
 #endif
@@ -1036,12 +1034,12 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
         }
     }
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
 #endif
 }
 
-#ifdef RDO_SKIP_BS
+#if RDO_SKIP_BS
 /*****************************************************************************
  * RD only; doesn't generate a valid bitstream
  * doesn't write cbp or chroma dc (I don't know how much this matters)
index 1d1f3564b5fb0a6c7f57e6894803aab3e68dec8d..483df9b181b5a81a259e5b4ebda8506a1c50441c 100644 (file)
 #include "common/common.h"
 #include "macroblock.h"
 
+#ifndef RDO_SKIP_BS
+#define RDO_SKIP_BS 0
+#endif
+
 static const uint8_t intra4x4_cbp_to_golomb[48]=
 {
   3, 29, 30, 17, 31, 18, 37,  8, 32, 38, 19,  9, 20, 10, 11,  2,
@@ -168,7 +172,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
                 }
                 else
                 {
-#ifdef RDO_SKIP_BS
+#if RDO_SKIP_BS
                     /* Weight highly against overflows. */
                     s->i_bits_encoded += 1000000;
 #else
@@ -212,7 +216,7 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
     if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
         && !array_non_zero(h->dct.luma16x16_dc) )
     {
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
         h->mb.i_qp = h->mb.i_last_qp;
 #endif
         i_dqp = 0;
@@ -300,7 +304,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
     int i_mb_i_offset;
     int i;
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     const int i_mb_pos_start = bs_pos( s );
     int       i_mb_pos_tex;
 #endif
@@ -327,7 +331,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         bs_write1( s, h->mb.b_interlaced );
     }
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     if( i_mb_type == I_PCM)
     {
         bs_write_ue( s, i_mb_i_offset + 25 );
@@ -579,7 +583,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         return;
     }
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     i_mb_pos_tex = bs_pos( s );
     h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
 #endif
@@ -628,12 +632,12 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
             }
     }
 
-#ifndef RDO_SKIP_BS
+#if !RDO_SKIP_BS
     h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
 #endif
 }
 
-#ifdef RDO_SKIP_BS
+#if RDO_SKIP_BS
 /*****************************************************************************
  * RD only; doesn't generate a valid bitstream
  * doesn't write cbp or chroma dc (I don't know how much this matters)
index b2c9f8e8781baee5c9ad49b366e687d2a548fe6c..655a3296a587c46fc6e8753b90e1686268f67f99 100644 (file)
@@ -22,7 +22,7 @@
  * instead of writing the bitstream.
  * TODO: use these for fast 1st pass too. */
 
-#define RDO_SKIP_BS
+#define RDO_SKIP_BS 1
 
 /* Transition and size tables for abs<9 MVD and residual coding */
 /* Consist of i_prefix-2 1s, one zero, and a bypass sign bit */