Measure CBP cost in i8x8 RD refinement

author Fiona Glaser <fiona@x264.com>

Sat, 31 Jan 2009 09:00:26 +0000 (01:00 -0800)

committer Fiona Glaser <fiona@x264.com>

Tue, 3 Feb 2009 05:21:42 +0000 (21:21 -0800)
author Fiona Glaser <fiona@x264.com>
Sat, 31 Jan 2009 09:00:26 +0000 (01:00 -0800)
committer Fiona Glaser <fiona@x264.com>
Tue, 3 Feb 2009 05:21:42 +0000 (21:21 -0800)
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 63a74ea9c601a0af09e26cdb96d4f3049f5ba28a..b02b945f47fc624c2748ef23fe2f229639f47910 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -95,6 +95,7 @@ typedef struct
      int i_predict16x16;
  
      int i_satd_i8x8;
+    int i_cbp_i8x8_luma;
      int i_satd_i8x8_dir[12][4];
      int i_predict8x8[4];
  
@@ -863,6 +864,7 @@ static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
          h->mb.i_type = I_8x8;
          x264_analyse_update_cache( h, a );
          a->i_satd_i8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
+        a->i_cbp_i8x8_luma = h->mb.i_cbp_luma;
      }
      else
          a->i_satd_i8x8 = COST_MAX;
@@ -896,7 +898,51 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
              COPY2_IF_LT( i_best, i_satd, a->i_predict16x16, i_mode );
          }
      }
-    else if( h->mb.i_type == I_4x4 )
+
+    /* RD selection for chroma prediction */
+    predict_8x8chroma_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
+    if( i_max > 1 )
+    {
+        i_thresh = a->i_satd_i8x8chroma * 5/4;
+
+        for( i = j = 0; i < i_max; i++ )
+            if( a->i_satd_i8x8chroma_dir[i] < i_thresh &&
+                predict_mode[i] != a->i_predict8x8chroma )
+            {
+                predict_mode[j++] = predict_mode[i];
+            }
+        i_max = j;
+
+        if( i_max > 0 )
+        {
+            int i_cbp_chroma_best = h->mb.i_cbp_chroma;
+            int i_chroma_lambda = x264_lambda2_tab[h->mb.i_chroma_qp];
+            /* the previous thing encoded was x264_intra_rd(), so the pixels and
+             * coefs for the current chroma mode are still around, so we only
+             * have to recount the bits. */
+            i_best = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 );
+            for( i = 0; i < i_max; i++ )
+            {
+                i_mode = predict_mode[i];
+                if( h->mb.b_lossless )
+                    x264_predict_lossless_8x8_chroma( h, i_mode );
+                else
+                {
+                    h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
+                    h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
+                }
+                /* if we've already found a mode that needs no residual, then
+                 * probably any mode with a residual will be worse.
+                 * so avoid dct on the remaining modes to improve speed. */
+                i_satd = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 );
+                COPY3_IF_LT( i_best, i_satd, a->i_predict8x8chroma, i_mode, i_cbp_chroma_best, h->mb.i_cbp_chroma );
+            }
+            h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
+            h->mb.i_cbp_chroma = i_cbp_chroma_best;
+        }
+    }
+
+    if( h->mb.i_type == I_4x4 )
      {
          uint32_t pels[4] = {0}; // doesn't need initting, just shuts up a gcc warning
          int i_nnz = 0;
@@ -950,10 +996,11 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
          {
              uint64_t pels_h = 0;
              uint8_t pels_v[7];
-            int i_nnz[3];
+            uint16_t i_nnz[2];
              uint8_t *p_src_by;
              uint8_t *p_dst_by;
              int j;
+            int cbp_luma_new = 0;
              i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8;
  
              i_best = COST_MAX64;
@@ -975,73 +1022,34 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
                      x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
                  else
                      h->predict_8x8[i_mode]( p_dst_by, edge );
+                h->mb.i_cbp_luma = a->i_cbp_i8x8_luma;
                  i_satd = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode );
  
                  if( i_best > i_satd )
                  {
                      a->i_predict8x8[idx] = i_mode;
+                    cbp_luma_new = h->mb.i_cbp_luma;
                      i_best = i_satd;
  
                      pels_h = *(uint64_t*)(p_dst_by+7*FDEC_STRIDE);
                      if( !(idx&1) )
                          for( j=0; j<7; j++ )
                              pels_v[j] = p_dst_by[7+j*FDEC_STRIDE];
-                    for( j=0; j<3; j++ )
-                        i_nnz[j] = h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]];
+                    i_nnz[0] = *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+0]];
+                    i_nnz[1] = *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+2]];
                  }
              }
-
+            a->i_cbp_i8x8_luma = cbp_luma_new;
              *(uint64_t*)(p_dst_by+7*FDEC_STRIDE) = pels_h;
              if( !(idx&1) )
                  for( j=0; j<7; j++ )
                      p_dst_by[7+j*FDEC_STRIDE] = pels_v[j];
-            for( j=0; j<3; j++ )
-                h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]] = i_nnz[j];
+            *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+0]] = i_nnz[0];
+            *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+2]] = i_nnz[1];
  
              x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );
          }
      }
-
-    /* RD selection for chroma prediction */
-    predict_8x8chroma_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
-    if( i_max > 1 )
-    {
-        i_thresh = a->i_satd_i8x8chroma * 5/4;
-
-        for( i = j = 0; i < i_max; i++ )
-            if( a->i_satd_i8x8chroma_dir[i] < i_thresh &&
-                predict_mode[i] != a->i_predict8x8chroma )
-            {
-                predict_mode[j++] = predict_mode[i];
-            }
-        i_max = j;
-
-        if( i_max > 0 )
-        {
-            int i_chroma_lambda = x264_lambda2_tab[h->mb.i_chroma_qp];
-            /* the previous thing encoded was x264_intra_rd(), so the pixels and
-             * coefs for the current chroma mode are still around, so we only
-             * have to recount the bits. */
-            i_best = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 );
-            for( i = 0; i < i_max; i++ )
-            {
-                i_mode = predict_mode[i];
-                if( h->mb.b_lossless )
-                    x264_predict_lossless_8x8_chroma( h, i_mode );
-                else
-                {
-                    h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
-                    h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
-                }
-                /* if we've already found a mode that needs no residual, then
-                 * probably any mode with a residual will be worse.
-                 * so avoid dct on the remaining modes to improve speed. */
-                i_satd = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 );
-                COPY2_IF_LT( i_best, i_satd, a->i_predict8x8chroma, i_mode );
-            }
-            h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
-        }
-    }
  }
  
  #define LOAD_FENC( m, src, xoff, yoff) \
diff --git a/encoder/cabac.c b/encoder/cabac.c

index 2015da5e6fdf13dd029dc776905c77f581995c9d..acdd4319099c616e9c588e55e017c5c95f537767 100644 (file)
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -1144,6 +1144,7 @@ static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8,
      const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
      i_mode = x264_mb_pred_mode4x4_fix( i_mode );
      x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
+    x264_cabac_mb_cbp_luma( h, cb );
      if( h->mb.i_cbp_luma & (1 << i8) )
          block_residual_write_cabac_8x8( h, cb, 4*i8, h->dct.luma8x8[i8] );
  }
diff --git a/encoder/cavlc.c b/encoder/cavlc.c

index e499fac598350df3161c24d56a348e9ca243b85d..bfeecc2f4937868e56953ac04dd5f22b4990f739 100644 (file)
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -688,14 +688,9 @@ static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
  
  static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
  {
-    int i4;
      h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
-    h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4], h->dct.luma8x8[i8] );
-    for( i4 = 0; i4 < 4; i4++ )
-    {
-        h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = array_non_zero( h->dct.luma4x4[i4+i8*4] );
-        block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
-    }
+    bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
+    x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 );
      return h->out.bs.i_bits_encoded;
  }
  
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 6faa305e7c52cf3c6f510f473d1adce3b448106b..d56f1a686c0f4237f5da5629af423af7d2cc7114 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -169,10 +169,10 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
      h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
  
      nz = x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
-    h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
      if( nz )
      {
          h->mb.i_cbp_luma |= 1<<idx;
+        h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
          h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
          h->dctf.add8x8_idct8( p_dst, dct8x8 );
          STORE_8x8_NNZ(idx,1);
diff --git a/encoder/rdo.c b/encoder/rdo.c

index 76cfdcaf0cd6aa7239699e9a3636878de84d9a57..a69cb2f0ab8b9dc7fedd20408c1145e0827703c4 100644 (file)
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -245,7 +245,7 @@ uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i4, int i_pixel )
  static uint64_t x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode )
  {
      uint64_t i_ssd, i_bits;
-    h->mb.i_cbp_luma = 0;
+    h->mb.i_cbp_luma &= ~(1<<i8);
      h->mb.b_transform_8x8 = 1;
  
      x264_mb_encode_i8x8( h, i8, h->mb.i_qp );
author	Fiona Glaser <fiona@x264.com>
	Sat, 31 Jan 2009 09:00:26 +0000 (01:00 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Tue, 3 Feb 2009 05:21:42 +0000 (21:21 -0800)
encoder/analyse.c		patch \| blob \| history
encoder/cabac.c		patch \| blob \| history
encoder/cavlc.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/rdo.c		patch \| blob \| history