Add row-reencoding support to VBV for improved accuracy

author Fiona Glaser <fiona@x264.com>

Thu, 9 Feb 2012 22:23:52 +0000 (14:23 -0800)

committer Fiona Glaser <fiona@x264.com>

Wed, 7 Mar 2012 01:13:15 +0000 (17:13 -0800)
author Fiona Glaser <fiona@x264.com>
Thu, 9 Feb 2012 22:23:52 +0000 (14:23 -0800)
committer Fiona Glaser <fiona@x264.com>
Wed, 7 Mar 2012 01:13:15 +0000 (17:13 -0800)
diff --git a/common/macroblock.c b/common/macroblock.c

index b4e6d951b0d1fa31a9aeaa1c48f36ee00316d921..6bb0566e80a905767abb24a4ab8b4f06e1263187 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -360,13 +360,11 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  {
      if( !b_lookahead )
      {
-        for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
+        for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
              for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
              {
                  CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
                  h->intra_border_backup[i][j] += 16;
-                if( !PARAM_INTERLACED )
-                    h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
              }
          for( int i = 0; i <= PARAM_INTERLACED; i++ )
          {
@@ -404,7 +402,7 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
      {
          for( int i = 0; i <= PARAM_INTERLACED; i++ )
              x264_free( h->deblock_strength[i] );
-        for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
+        for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
              for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
                  x264_free( h->intra_border_backup[i][j] - 16 );
      }
@@ -563,7 +561,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
                       ? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
                       : 16 * mb_x + height * mb_y * i_stride;
      pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
-    int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
+    int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : !(mb_y&1);
      pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
      int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
      /* ref_pix_offset[0] references the current field and [1] the opposite field. */
@@ -576,20 +574,16 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
          h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height );
          memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
          memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
-        if( b_mbaff )
-        {
-            h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
-            h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
-        }
+        h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
+        h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
      }
      else
      {
          h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 );
          memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
-        if( b_mbaff )
-            h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
+        h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
      }
-    if( b_mbaff )
+    if( b_mbaff || h->mb.b_reencode_mb )
      {
          for( int j = 0; j < height; j++ )
              if( b_chroma )
@@ -1638,7 +1632,7 @@ static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int
       * For progressive mbs this is the bottom two rows, and for interlaced the
       * bottom row of each field. We also store samples needed for the next
       * mbpair in intra_border_backup[2]. */
-    int backup_dst = !b_mbaff ? 0 : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
+    int backup_dst = !b_mbaff ? (mb_y&1) : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
      memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16  ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
      if( CHROMA444 )
      {
@@ -1672,14 +1666,6 @@ static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int
              }
          }
      }
-    else
-    {
-        /* In progressive we update intra_border_backup in-place, so the topleft neighbor will
-         * no longer exist there when load_pic_pointers wants it. Move it within p_fdec instead. */
-        h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[0][-FDEC_STRIDE+15];
-        h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)];
-        h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)];
-    }
  }
  
  void x264_macroblock_cache_save( x264_t *h )
diff --git a/encoder/encoder.c b/encoder/encoder.c

index b420544a9b98c0c0a9fc8cedef793ff2585fe372..93842ef98d069fc5dc57578ebc4ccccdb5b5561e 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -334,17 +334,17 @@ static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal
  }
  
  /* If we are within a reasonable distance of the end of the memory allocated for the bitstream, */
-/* reallocate, adding an arbitrary amount of space (100 kilobytes). */
+/* reallocate, adding an arbitrary amount of space. */
  static int x264_bitstream_check_buffer( x264_t *h )
  {
      uint8_t *bs_bak = h->out.p_bitstream;
-    int max_mb_size = 2500 << SLICE_MBAFF;
-    if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_mb_size)) ||
-        (h->out.bs.p_end - h->out.bs.p < max_mb_size) )
+    int max_row_size = (2500 << SLICE_MBAFF) * h->mb.i_mb_width;
+    if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_row_size)) ||
+        (h->out.bs.p_end - h->out.bs.p < max_row_size) )
      {
-        h->out.i_bitstream += 100000;
+        h->out.i_bitstream += max_row_size;
          CHECKED_MALLOC( h->out.p_bitstream, h->out.i_bitstream );
-        h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - 100000) & ~15 );
+        h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - max_row_size) & ~15 );
          intptr_t delta = h->out.p_bitstream - bs_bak;
  
          h->out.bs.p_start += delta;
@@ -580,7 +580,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
      }
      h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
      h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
-    h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 0, QP_MAX );
+    h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 2, QP_MAX );
      h->param.rc.i_bitrate = x264_clip3( h->param.rc.i_bitrate, 0, 2000000 );
      h->param.rc.i_vbv_buffer_size = x264_clip3( h->param.rc.i_vbv_buffer_size, 0, 2000000 );
      h->param.rc.i_vbv_max_bitrate = x264_clip3( h->param.rc.i_vbv_max_bitrate, 0, 2000000 );
@@ -2059,12 +2059,20 @@ typedef struct
      bs_t bs;
      x264_cabac_t cabac;
      x264_frame_stat_t stat;
+    int last_qp;
+    int last_dqp;
+    int field_decoding_flag;
  } x264_bs_bak_t;
  
  static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full )
  {
      if( full )
+    {
          bak->stat = h->stat.frame;
+        bak->last_qp = h->mb.i_last_qp;
+        bak->last_dqp = h->mb.i_last_dqp;
+        bak->field_decoding_flag = h->mb.field_decoding_flag;
+    }
      else
      {
          bak->stat.i_mv_bits = h->stat.frame.i_mv_bits;
@@ -2093,7 +2101,12 @@ static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak,
  static ALWAYS_INLINE void x264_bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full )
  {
      if( full )
+    {
          h->stat.frame = bak->stat;
+        h->mb.i_last_qp = bak->last_qp;
+        h->mb.i_last_dqp = bak->last_dqp;
+        h->mb.field_decoding_flag = bak->field_decoding_flag;
+    }
      else
      {
          h->stat.frame.i_mv_bits = bak->stat.i_mv_bits;
@@ -2128,8 +2141,9 @@ static int x264_slice_write( x264_t *h )
      int starting_bits = bs_pos(&h->out.bs);
      int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
      int b_hpel = h->fdec->b_kept_as_ref;
+    int orig_last_mb = h->sh.i_last_mb;
      uint8_t *last_emu_check;
-    x264_bs_bak_t bs_bak[1];
+    x264_bs_bak_t bs_bak[2];
      b_deblock &= b_hpel || h->param.psz_dump_yuv;
      bs_realign( &h->out.bs );
  
@@ -2175,17 +2189,18 @@ static int x264_slice_write( x264_t *h )
          mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
          int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
  
-        if( !(i_mb_y & SLICE_MBAFF) )
+        if( i_mb_x == 0 )
          {
              if( x264_bitstream_check_buffer( h ) )
                  return -1;
-
-            if( back_up_bitstream )
-                x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 );
+            if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size )
+                x264_bitstream_backup( h, &bs_bak[1], i_skip, 1 );
+            if( !h->mb.b_reencode_mb )
+                x264_fdec_filter_row( h, i_mb_y, 1 );
          }
  
-        if( i_mb_x == 0 && !h->mb.b_reencode_mb )
-            x264_fdec_filter_row( h, i_mb_y, 1 );
+        if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
+            x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 );
  
          if( PARAM_INTERLACED )
          {
@@ -2290,14 +2305,10 @@ reencode:
                      break;
                  }
                  else
-                {
                      h->sh.i_last_mb = mb_xy;
-                    h->mb.b_reencode_mb = 0;
-                }
              }
-            else
-                h->mb.b_reencode_mb = 0;
          }
+        h->mb.b_reencode_mb = 0;
  
  #if HAVE_VISUALIZE
          if( h->param.b_visualize )
@@ -2307,6 +2318,17 @@ reencode:
          /* save cache */
          x264_macroblock_cache_save( h );
  
+        if( x264_ratecontrol_mb( h, mb_size ) < 0 )
+        {
+            x264_bitstream_restore( h, &bs_bak[1], &i_skip, 1 );
+            h->mb.b_reencode_mb = 1;
+            i_mb_x = 0;
+            i_mb_y = i_mb_y - SLICE_MBAFF;
+            h->mb.i_mb_prev_xy = i_mb_y * h->mb.i_mb_stride - 1;
+            h->sh.i_last_mb = orig_last_mb;
+            continue;
+        }
+
          /* accumulate mb stats */
          h->stat.frame.i_mb_count[h->mb.i_type]++;
  
@@ -2381,8 +2403,6 @@ reencode:
          if( b_deblock )
              x264_macroblock_deblock_strength( h );
  
-        x264_ratecontrol_mb( h, mb_size );
-
          if( mb_xy == h->sh.i_last_mb )
              break;
  
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index 218f3f9ce344f852447a67796e34b7331ceb7812..ee2911baca20f2ae032c81601e178634c06c5c74 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -87,7 +87,9 @@ struct x264_ratecontrol_t
      int qp;                     /* qp for current frame */
      float qpm;                  /* qp for current macroblock: precise float for AQ */
      float qpa_rc;               /* average of macroblocks' qp before aq */
+    float qpa_rc_prev;
      int   qpa_aq;               /* average of macroblocks' qp after aq */
+    int   qpa_aq_prev;
      float qp_novbv;             /* QP for the current frame if 1-pass VBV was disabled. */
  
      /* VBV stuff */
@@ -1335,8 +1337,8 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
  
      q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  
-    rc->qpa_rc =
-    rc->qpa_aq = 0;
+    rc->qpa_rc = rc->qpa_rc_prev =
+    rc->qpa_aq = rc->qpa_aq_prev = 0;
      rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX );
      h->fdec->f_qp_avg_rc =
      h->fdec->f_qp_avg_aq =
@@ -1400,7 +1402,7 @@ static float predict_row_size_sum( x264_t *h, int y, float qp )
   *  eliminate all use of qp in row ratecontrol: make it entirely qscale-based.
   *  make this function stop being needlessly O(N^2)
   *  update more often than once per row? */
-void x264_ratecontrol_mb( x264_t *h, int bits )
+int x264_ratecontrol_mb( x264_t *h, int bits )
  {
      x264_ratecontrol_t *rc = h->rc;
      const int y = h->mb.i_mb_y;
@@ -1409,13 +1411,13 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
      rc->qpa_aq += h->mb.i_qp;
  
      if( h->mb.i_mb_x != h->mb.i_mb_width - 1 )
-        return;
+        return 0;
  
      x264_emms();
      rc->qpa_rc += rc->qpm * h->mb.i_mb_width;
  
      if( !rc->b_vbv )
-        return;
+        return 0;
  
      float qscale = qp2qscale( rc->qpm );
      h->fdec->f_row_qp[y] = rc->qpm;
@@ -1427,19 +1429,38 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  
      /* update ratecontrol per-mbpair in MBAFF */
      if( SLICE_MBAFF && !(y&1) )
-        return;
+        return 0;
+
+    /* FIXME: We don't currently support the case where there's a slice
+     * boundary in between. */
+    int can_reencode_row = h->sh.i_first_mb <= ((h->mb.i_mb_y - SLICE_MBAFF) * h->mb.i_mb_stride);
  
      /* tweak quality based on difference from predicted size */
+    float prev_row_qp = h->fdec->f_row_qp[y];
+    float qp_absolute_max = h->param.rc.i_qp_max;
+    if( rc->rate_factor_max_increment )
+        qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
+    float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
+    float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
+    float step_size = 0.5f;
+    float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
+    float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
+    float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
+    float size_of_other_slices = 0;
+    if( h->param.b_sliced_threads )
+    {
+        float size_of_other_slices_planned = 0;
+        for( int i = 0; i < h->param.i_threads; i++ )
+            if( h != h->thread[i] )
+            {
+                size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
+                size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
+            }
+        float weight = rc->slice_size_planned / rc->frame_size_planned;
+        size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
+    }
      if( y < h->i_threadslice_end-1 )
      {
-        float prev_row_qp = h->fdec->f_row_qp[y];
-        float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
-        float qp_absolute_max = h->param.rc.i_qp_max;
-        if( rc->rate_factor_max_increment )
-            qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
-        float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
-        float step_size = 0.5f;
-
          /* B-frames shouldn't use lower QP than their reference frames. */
          if( h->sh.i_type == SLICE_TYPE_B )
          {
@@ -1447,31 +1468,14 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
              rc->qpm = X264_MAX( rc->qpm, qp_min );
          }
  
-        float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
-        float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
-        float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
-        float size_of_other_slices = 0;
-        if( h->param.b_sliced_threads )
-        {
-            float size_of_other_slices_planned = 0;
-            for( int i = 0; i < h->param.i_threads; i++ )
-                if( h != h->thread[i] )
-                {
-                    size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
-                    size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
-                }
-            float weight = rc->slice_size_planned / rc->frame_size_planned;
-            size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
-        }
-
          /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
          float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
          float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
  
-        /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
+        /* Don't increase the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
          /* area at the top of the frame was measured inaccurately. */
          if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned )
-            return;
+            qp_max = qp_absolute_max = prev_row_qp;
  
          if( h->sh.i_type != SLICE_TYPE_I )
              rc_tol *= 0.5f;
@@ -1507,9 +1511,39 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
          }
  
          h->rc->frame_size_estimated = b1 - size_of_other_slices;
+
+        /* If the current row was large enough to cause a large QP jump, try re-encoding it. */
+        if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row )
+        {
+            /* Bump QP to halfway in between... close enough. */
+            rc->qpm = x264_clip3f( (prev_row_qp + rc->qpm)*0.5f, prev_row_qp + 1.0f, qp_max );
+            rc->qpa_rc = rc->qpa_rc_prev;
+            rc->qpa_aq = rc->qpa_aq_prev;
+            h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
+            return -1;
+        }
      }
      else
+    {
          h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
+
+        /* Last-ditch attempt: if the last row of the frame underflowed the VBV,
+         * try again. */
+        if( (h->rc->frame_size_estimated + size_of_other_slices) > (rc->buffer_fill - rc->buffer_rate * max_frame_error) &&
+             rc->qpm < qp_max && can_reencode_row )
+        {
+            rc->qpm = qp_max;
+            rc->qpa_rc = rc->qpa_rc_prev;
+            rc->qpa_aq = rc->qpa_aq_prev;
+            h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
+            return -1;
+        }
+    }
+
+    rc->qpa_rc_prev = rc->qpa_rc;
+    rc->qpa_aq_prev = rc->qpa_aq;
+
+    return 0;
  }
  
  int x264_ratecontrol_qp( x264_t *h )
diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h

index a10bf3e8acfc67017ccb0d87c8115bf53406b42c..19d4197f6ab1bb941a065a828645b8ebf95a4031 100644 (file)
--- a/encoder/ratecontrol.h
+++ b/encoder/ratecontrol.h
@@ -51,7 +51,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
  void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead );
  int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
  void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm );
-void x264_ratecontrol_mb( x264_t *, int bits );
+int  x264_ratecontrol_mb( x264_t *, int bits );
  int  x264_ratecontrol_qp( x264_t * );
  int  x264_ratecontrol_mb_qp( x264_t *h );
  int  x264_ratecontrol_end( x264_t *, int bits, int *filler );
author	Fiona Glaser <fiona@x264.com>
	Thu, 9 Feb 2012 22:23:52 +0000 (14:23 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Wed, 7 Mar 2012 01:13:15 +0000 (17:13 -0800)
common/macroblock.c		patch \| blob \| history
encoder/encoder.c		patch \| blob \| history
encoder/ratecontrol.c		patch \| blob \| history
encoder/ratecontrol.h		patch \| blob \| history