skip intra pred+dct+quant in cases where it's redundant (analyse vs encode)

author Fiona Glaser <fiona@x264.com>

Wed, 19 Mar 2008 21:01:05 +0000 (15:01 -0600)

committer Loren Merritt <pengvado@akuvian.org>

Wed, 19 Mar 2008 23:46:38 +0000 (17:46 -0600)
author Fiona Glaser <fiona@x264.com>
Wed, 19 Mar 2008 21:01:05 +0000 (15:01 -0600)
committer Loren Merritt <pengvado@akuvian.org>
Wed, 19 Mar 2008 23:46:38 +0000 (17:46 -0600)
diff --git a/common/common.h b/common/common.h

index 584bcd76573e4ca4dd1483bc27f0adac8959a50a..7c4be09afba1a4ac56b81ebbad506e1820d77554 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -424,6 +424,12 @@ struct x264_t
          int     i_intra16x16_pred_mode;
          int     i_chroma_pred_mode;
  
+        /* skip flags for i4x4 and i8x8
+         * 0 = encode as normal.
+         * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
+         * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
+        int i_skip_intra;
+
          struct
          {
              /* space for p_fenc and p_fdec */
@@ -432,6 +438,12 @@ struct x264_t
              DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 );
              DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 );
  
+            /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */            
+            DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 );
+            DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 );
+            DECLARE_ALIGNED( int, i8x8_dct_buf[3][64], 16 );
+            DECLARE_ALIGNED( int, i4x4_dct_buf[15][16], 16 );
+
              /* pointer over mb of the frame to be compressed */
              uint8_t *p_fenc[3];
  
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 741ab1eb73afcffd764155b19e0cd1740db24d17..02646211540ab7a326a80f36303c7d12c87743f0 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -229,6 +229,10 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
      a->i_satd_i8x8chroma = COST_MAX;
  
      a->b_fast_intra = 0;
+    h->mb.i_skip_intra =
+        h->mb.b_lossless ? 0 :
+        a->b_mbrd ? 2 :
+        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
  
      /* II: Inter part P/B frame */
      if( h->sh.i_type != SLICE_TYPE_I )
@@ -646,7 +650,15 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
          }
  
          if( idx == 3 )
+        {
              a->i_satd_i8x8 = i_cost;
+            if( h->mb.i_skip_intra )
+            {
+                h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
+                if( h->mb.i_skip_intra == 2 )
+                    h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) );
+            }
+        }
          else
          {
              a->i_satd_i8x8 = COST_MAX;
@@ -723,7 +735,15 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
              h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
          }
          if( idx == 15 )
+        {
              a->i_satd_i4x4 = i_cost;
+            if( h->mb.i_skip_intra )
+            {
+                h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
+                if( h->mb.i_skip_intra == 2 )
+                    h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.block, sizeof(h->mb.pic.i4x4_dct_buf) );
+            }
+        }
          else
              a->i_satd_i4x4 = COST_MAX;
      }
@@ -768,6 +788,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
      int i_max, i_satd, i_best, i_mode, i_thresh;
      int i_pred_mode;
      int predict_mode[9];
+    h->mb.i_skip_intra = 0;
  
      if( h->mb.i_type == I_16x16 )
      {
@@ -2569,6 +2590,8 @@ void x264_macroblock_analyse( x264_t *h )
  
      h->mb.b_trellis = h->param.analyse.i_trellis;
      h->mb.b_noise_reduction = h->param.analyse.i_noise_reduction;
+    if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
+        h->mb.i_skip_intra = 0;
  }
  
  /*-------------------- Update MB from the analysis ----------------------*/
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 67bc2d83b04bb7932911b4dc16befc4b31755a8a..d095b7c2fae793ecace0f54bd7beda595343496f 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -368,7 +368,15 @@ void x264_macroblock_encode( x264_t *h )
      {
          DECLARE_ALIGNED( uint8_t, edge[33], 16 );
          h->mb.b_transform_8x8 = 1;
-        for( i = 0; i < 4; i++ )
+        /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
+        if( h->mb.i_skip_intra )
+        {
+            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
+            /* In RD mode, restore the now-overwritten DCT data. */
+            if( h->mb.i_skip_intra == 2 )
+                h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
+        }
+        for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
          {
              uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
              int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
@@ -381,7 +389,15 @@ void x264_macroblock_encode( x264_t *h )
      else if( h->mb.i_type == I_4x4 )
      {
          h->mb.b_transform_8x8 = 0;
-        for( i = 0; i < 16; i++ )
+        /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
+        if( h->mb.i_skip_intra )
+        {
+            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
+            /* In RD mode, restore the now-overwritten DCT data. */
+            if( h->mb.i_skip_intra == 2 )
+                h->mc.memcpy_aligned( h->dct.block, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
+        }
+        for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
          {
              uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * FDEC_STRIDE];
              int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
author	Fiona Glaser <fiona@x264.com>
	Wed, 19 Mar 2008 21:01:05 +0000 (15:01 -0600)
committer	Loren Merritt <pengvado@akuvian.org>
	Wed, 19 Mar 2008 23:46:38 +0000 (17:46 -0600)
common/common.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history