Move adaptive quantization to before ratecontrol, eliminate qcomp bias

author Fiona Glaser <fiona@x264.com>

Sat, 13 Sep 2008 21:03:12 +0000 (14:03 -0700)

committer Fiona Glaser <fiona@x264.com>

Sun, 14 Sep 2008 00:27:03 +0000 (17:27 -0700)
author Fiona Glaser <fiona@x264.com>
Sat, 13 Sep 2008 21:03:12 +0000 (14:03 -0700)
committer Fiona Glaser <fiona@x264.com>
Sun, 14 Sep 2008 00:27:03 +0000 (17:27 -0700)
diff --git a/common/common.c b/common/common.c

index 96b6f2bdef9942a8e7da596d579f3a3ff3d0b06d..c25dea7c00dc2b734f4ca45c3e066102aa48bd9f 100644 (file)
--- a/common/common.c
+++ b/common/common.c
@@ -93,7 +93,7 @@ void    x264_param_default( x264_param_t *param )
      param->rc.i_qp_step = 4;
      param->rc.f_ip_factor = 1.4;
      param->rc.f_pb_factor = 1.3;
-    param->rc.i_aq_mode = X264_AQ_GLOBAL;
+    param->rc.i_aq_mode = X264_AQ_VARIANCE;
      param->rc.f_aq_strength = 1.0;
  
      param->rc.b_stat_write = 0;
diff --git a/common/frame.c b/common/frame.c

index bba0da9b92d41a7e3367809e8b5026cfea46c892..c824a76d4b5ef78e063668dd92b3c196c0720bfe 100644 (file)
--- a/common/frame.c
+++ b/common/frame.c
@@ -114,6 +114,9 @@ x264_frame_t *x264_frame_new( x264_t *h )
          for( j = 0; j < h->param.i_bframe + 2; j++ )
              CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
  
+    if( h->param.rc.i_aq_mode )
+        CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+
      x264_pthread_mutex_init( &frame->mutex, NULL );
      x264_pthread_cond_init( &frame->cv, NULL );
  
diff --git a/common/frame.h b/common/frame.h

index 523689fcb0867bce8af6c45138fda70395f28299..6e96da656e1914ff70dc35847289bf84cc10e3c8 100644 (file)
--- a/common/frame.h
+++ b/common/frame.h
@@ -71,12 +71,14 @@ typedef struct
       * contains the SATD cost of the lowres frame encoded in various modes
       * FIXME: how big an array do we need? */
      int     i_cost_est[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
+    int     i_cost_est_aq[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
      int     i_satd; // the i_cost_est of the selected frametype
      int     i_intra_mbs[X264_BFRAME_MAX+2];
      int     *i_row_satds[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
      int     *i_row_satd;
      int     *i_row_bits;
      int     *i_row_qp;
+    float   *f_qp_offset;
  
      /* threading */
      int     i_lines_completed; /* in pixels */
diff --git a/encoder/encoder.c b/encoder/encoder.c

index 92bc699cdf6a6be46b48c963976d5c4c960e2b56..4141d0959995361044da5660f205b1084341b73b 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -487,12 +487,9 @@ static int x264_validate_parameters( x264_t *h )
      if( !h->param.b_cabac )
          h->param.analyse.i_trellis = 0;
      h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
-    h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
+    h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 1 );
      if( h->param.rc.f_aq_strength <= 0 )
          h->param.rc.i_aq_mode = 0;
-    /* VAQ effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
-    if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
-        h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.rc.f_aq_strength / 0.7, 0, 1);
      h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  
      {
@@ -1362,6 +1359,9 @@ int     x264_encoder_encode( x264_t *h,
          if( h->frames.b_have_lowres )
              x264_frame_init_lowres( h, fenc );
  
+        if( h->param.rc.i_aq_mode )
+            x264_adaptive_quant_frame( h, fenc );
+
          if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
          {
              /* Nothing yet to encode */
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index 4c425e358d3a805bdd868126edded11dd8866d72..555404f9127aaf8d40ceadd5fff71875493818d3 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -127,10 +127,6 @@ struct x264_ratecontrol_t
      int bframes;                /* # consecutive B-frames before this P-frame */
      int bframe_bits;            /* total cost of those frames */
  
-    /* AQ stuff */
-    float aq_threshold;
-    int *ac_energy;
-
      int i_zones;
      x264_zone_t *zones;
      x264_zone_t *prev_zone;
@@ -172,64 +168,40 @@ static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
  }
  
  // Find the total AC energy of the block in all planes.
-static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
+static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
  {
      /* This function contains annoying hacks because GCC has a habit of reordering emms
       * and putting it after floating point ops.  As a result, we put the emms at the end of the
       * function and make sure that its always called before the float math.  Noinline makes
       * sure no reordering goes on. */
-    /* FIXME: This array is larger than necessary because a bug in GCC causes an all-zero
-    * array to be placed in .bss despite .bss not being correctly aligned on some platforms (win32?) */
-    DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
      unsigned int var=0, sad, i;
-    if( satd || h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
+    for( i=0; i<3; i++ )
      {
-        for( i=0; i<3; i++ )
-        {
-            int w = i ? 8 : 16;
-            int stride = h->fenc->i_stride[i];
-            int offset = h->mb.b_interlaced
-                ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
-                : w * (mb_x + mb_y * stride);
-            int pix = i ? PIXEL_8x8 : PIXEL_16x16;
-            stride <<= h->mb.b_interlaced;
-            var += h->pixf.var[pix]( h->fenc->plane[i]+offset, stride, &sad );
-            // SATD to represent the block's overall complexity (bit cost) for intra encoding.
-            // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
-            if( var && satd )
-                *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - sad/2;
-        }
-        var = X264_MAX(var,1);
+        int w = i ? 8 : 16;
+        int stride = frame->i_stride[i];
+        int offset = h->mb.b_interlaced
+            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
+            : w * (mb_x + mb_y * stride);
+        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
+        stride <<= h->mb.b_interlaced;
+        var += h->pixf.var[pix]( frame->plane[i]+offset, stride, &sad );
      }
-    else var = h->rc->ac_energy[h->mb.i_mb_xy];
+    var = X264_MAX(var,1);
      x264_emms();
      return var;
  }
  
-static void x264_autosense_aq( x264_t *h )
+void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  {
-    double total = 0;
-    double n = 0;
      int mb_x, mb_y;
-    // FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?). Can we reuse them?
-    // FIXME: Is chroma SATD necessary?
      for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
          for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
          {
-            int satd=0;
-            int energy = ac_energy_mb( h, mb_x, mb_y, &satd );
-            h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
-            /* Weight the energy value by the SATD value of the MB.
-             * This represents the fact that the more complex blocks in a frame should
-             * be weighted more when calculating the optimal threshold. This also helps
-             * diminish the negative effect of large numbers of simple blocks in a frame,
-             * such as in the case of a letterboxed film. */
-            total += logf(energy) * satd;
-            n += satd;
+            int energy = ac_energy_mb( h, mb_x, mb_y, frame );
+            /* 10 constant chosen to result in approximately the same overall bitrate as without AQ. */
+            float qp_adj = h->param.rc.f_aq_strength * 1.5 * (logf(energy) - 10.0);
+            frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
          }
-    x264_emms();
-    /* Calculate and store the threshold. */
-    h->rc->aq_threshold = n ? total/n : 15;
  }
  
  /*****************************************************************************
@@ -241,13 +213,11 @@ static void x264_autosense_aq( x264_t *h )
  *****************************************************************************/
  void x264_adaptive_quant( x264_t *h )
  {
-    int energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
-    /* Adjust the QP based on the AC energy of the macroblock. */
-    float qp = h->rc->f_qpm;
-    float qp_adj = 1.5 * (logf(energy) - h->rc->aq_threshold);
-    if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
-        qp_adj = x264_clip3f( qp_adj, -5, 5 );
-    h->mb.i_qp = x264_clip3( qp + qp_adj * h->param.rc.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+    float qp, qp_adj;
+    x264_emms();
+    qp = h->rc->f_qpm;
+    qp_adj = h->fenc->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride];
+    h->mb.i_qp = x264_clip3( qp + qp_adj + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
      /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
       * to lower the bit cost of the qp_delta. */
      if( abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
@@ -554,8 +524,6 @@ int x264_ratecontrol_new( x264_t *h )
          h->thread[i]->rc = rc+i;
          if( i )
              rc[i] = rc[0];
-        if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
-            rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
      }
  
      return 0;
@@ -717,8 +685,6 @@ void x264_ratecontrol_delete( x264_t *h )
                      x264_free( rc->zones[i].param );
          x264_free( rc->zones );
      }
-    for( i=0; i<h->param.i_threads; i++ )
-        x264_free( rc[i].ac_energy );
      x264_free( rc );
  }
  
@@ -842,14 +808,6 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp )
  
      if( h->sh.i_type != SLICE_TYPE_B )
          rc->last_non_b_pict_type = h->sh.i_type;
-
-    /* Adaptive AQ thresholding algorithm. */
-    if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
-        /* Arbitrary value for "center" of the AQ curve.
-         * Chosen so that any given value of CRF has on average similar bitrate with and without AQ. */
-        h->rc->aq_threshold = logf(5000);
-    else if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
-        x264_autosense_aq(h);
  }
  
  static double predict_row_size( x264_t *h, int y, int qp )
diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h

index d5e3371a6711048f3695678b4b949c02d4e1482c..3310d3c2a7dc0f8ec681c3c5c823f70d1c100709 100644 (file)
--- a/encoder/ratecontrol.h
+++ b/encoder/ratecontrol.h
@@ -27,6 +27,8 @@
  int  x264_ratecontrol_new   ( x264_t * );
  void x264_ratecontrol_delete( x264_t * );
  
+void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
+void x264_adaptive_quant( x264_t * );
  void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
  void x264_ratecontrol_start( x264_t *, int i_force_qp );
  int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
@@ -34,7 +36,6 @@ void x264_ratecontrol_mb( x264_t *, int bits );
  int  x264_ratecontrol_qp( x264_t * );
  void x264_ratecontrol_end( x264_t *, int bits );
  void x264_ratecontrol_summary( x264_t * );
-void x264_adaptive_quant( x264_t * );
  void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
  int  x264_ratecontrol_get_estimated_size( x264_t const *);
  int  x264_rc_analyse_slice( x264_t *h );
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index e1b42f35a135a89d0cd174f0393875a65787abf7..e4585e843737a5ea8441f0665fe928352be47975 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -248,6 +248,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
                                 int b_intra_penalty )
  {
      int i_score = 0;
+    /* Don't use the AQ'd scores for slicetype decision. */
+    int i_score_aq = 0;
  
      /* Check whether we already evaluated this frame
       * If we have tried this frame as P, then we have also tried
@@ -276,9 +278,15 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
          if( p1 != p0 )
              dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
  
+        if( h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+        {
+            for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+                for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+                    i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+        }
          /* the edge mbs seem to reduce the predictive quality of the
           * whole frame's score, but are needed for a spatial distribution. */
-        if( h->param.rc.i_vbv_buffer_size )
+        else if( h->param.rc.i_vbv_buffer_size )
          {
              for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
              {
@@ -286,33 +294,45 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
                  for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
                  {
                      int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
-                    row_satd[ h->mb.i_mb_y ] += i_mb_cost;
+                    int i_mb_cost_aq = i_mb_cost;
+                    if( h->param.rc.i_aq_mode )
+                    {
+                        x264_emms();
+                        i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
+                    }
+                    row_satd[ h->mb.i_mb_y ] += i_mb_cost_aq;
                      if( h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
                          h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 )
                      {
+                        /* Don't use AQ-weighted costs for slicetype decision, only for ratecontrol. */
                          i_score += i_mb_cost;
+                        i_score_aq += i_mb_cost_aq;
                      }
                  }
              }
          }
-        else if( h->sps->i_mb_width > 2 && h->sps->i_mb_height > 2 )
+        else
          {
              for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ )
                  for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ )
-                    i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
-        }
-        else
-        {
-            for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
-                for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
-                    i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+                {
+                    int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+                    int i_mb_cost_aq = i_mb_cost;
+                    if( h->param.rc.i_aq_mode )
+                    {
+                        x264_emms();
+                        i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
+                    }
+                    i_score += i_mb_cost;
+                    i_score_aq += i_mb_cost_aq;
+                }
          }
  
-
          if( b != p1 )
              i_score = i_score * 100 / (120 + h->param.i_bframe_bias);
  
          frames[b]->i_cost_est[b-p0][p1-b] = i_score;
+        frames[b]->i_cost_est_aq[b-p0][p1-b] = i_score_aq;
  //      fprintf( stderr, "frm %d %c(%d,%d): %6d %6d imb:%d  \n", frames[b]->i_frame,
  //               (p1==0?'I':b<p1?'B':'P'), b-p0, p1-b, i_score, frames[b]->i_cost_est[0][0], frames[b]->i_intra_mbs[b-p0] );
          x264_emms();
@@ -538,6 +558,11 @@ int x264_rc_analyse_slice( x264_t *h )
      frames[b] = h->fenc;
  
      cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
+
+    /* In AQ, use the weighted score instead. */
+    if( h->param.rc.i_aq_mode )
+        cost = frames[b]->i_cost_est[b-p0][p1-b];
+
      h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
      h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
      h->fdec->i_satd = cost;
diff --git a/x264.c b/x264.c

index f472fd78a727435c7935b4965bfceb051020ce46..e57c297a1c4d43a8f8ea967bf472eeae8481ab50 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -194,10 +194,9 @@ static void Help( x264_param_t *defaults, int b_longhelp )
      H0( "      --ipratio <float>       QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor );
      H0( "      --pbratio <float>       QP factor between P and B [%.2f]\n", defaults->rc.f_pb_factor );
      H1( "      --chroma-qp-offset <integer>  QP difference between chroma and luma [%d]\n", defaults->analyse.i_chroma_qp_offset );
-    H0( "      --aq-mode <integer>     How AQ distributes bits [%d]\n"
+    H1( "      --aq-mode <integer>     AQ method [%d]\n"
          "                                  - 0: Disabled\n"
-        "                                  - 1: Avoid moving bits between frames\n"
-        "                                  - 2: Move bits between frames\n", defaults->rc.i_aq_mode );
+        "                                  - 1: Variance AQ (complexity mask)\n", defaults->rc.i_aq_mode );
      H0( "      --aq-strength <float>   Reduces blocking and blurring in flat and\n"
          "                              textured areas. [%.1f]\n"
          "                                  - 0.5: weak AQ\n"
diff --git a/x264.h b/x264.h

index 538e9a74e9f0dff340f559430076165daaed7408..c16e597b9aaba69d01a0eefce46ccd584f67901c 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@
  
  #include <stdarg.h>
  
-#define X264_BUILD 61
+#define X264_BUILD 62
  
  /* x264_t:
   *      opaque handler for encoder */
@@ -85,8 +85,7 @@ typedef struct x264_t x264_t;
  #define X264_RC_CRF                  1
  #define X264_RC_ABR                  2
  #define X264_AQ_NONE                 0
-#define X264_AQ_LOCAL                1
-#define X264_AQ_GLOBAL               2
+#define X264_AQ_VARIANCE             1
  
  static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
  static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
author	Fiona Glaser <fiona@x264.com>
	Sat, 13 Sep 2008 21:03:12 +0000 (14:03 -0700)
committer	Fiona Glaser <fiona@x264.com>
	Sun, 14 Sep 2008 00:27:03 +0000 (17:27 -0700)
common/common.c		patch \| blob \| history
common/frame.c		patch \| blob \| history
common/frame.h		patch \| blob \| history
encoder/encoder.c		patch \| blob \| history
encoder/ratecontrol.c		patch \| blob \| history
encoder/ratecontrol.h		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history
x264.c		patch \| blob \| history
x264.h		patch \| blob \| history