This change improves VBV accuracy and improves bit distribution in CRF and 2pass.
Instead of being applied after ratecontrol, AQ becomes part of the complexity measure that ratecontrol uses.
This allows for modularity for changes to AQ; a new AQ algorithm can be introduced simply by introducing a new aq_mode and a corresponding if in adaptive_quant_frame.
This also allows quantizer field smoothing, since quantizers are calculated beofrehand rather during encoding.
Since there is no more reason for it, aq_mode 1 is removed. The new mode 1 is in a sense a merger of the old modes 1 and 2.
WARNING: This change redefines CRF when using AQ, so output bitrate for a given CRF may be significantly different from before this change!
param->rc.i_qp_step = 4;
param->rc.f_ip_factor = 1.4;
param->rc.f_pb_factor = 1.3;
- param->rc.i_aq_mode = X264_AQ_GLOBAL;
+ param->rc.i_aq_mode = X264_AQ_VARIANCE;
param->rc.f_aq_strength = 1.0;
param->rc.b_stat_write = 0;
for( j = 0; j < h->param.i_bframe + 2; j++ )
CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+ if( h->param.rc.i_aq_mode )
+ CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+
x264_pthread_mutex_init( &frame->mutex, NULL );
x264_pthread_cond_init( &frame->cv, NULL );
* contains the SATD cost of the lowres frame encoded in various modes
* FIXME: how big an array do we need? */
int i_cost_est[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
+ int i_cost_est_aq[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int i_satd; // the i_cost_est of the selected frametype
int i_intra_mbs[X264_BFRAME_MAX+2];
int *i_row_satds[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int *i_row_satd;
int *i_row_bits;
int *i_row_qp;
+ float *f_qp_offset;
/* threading */
int i_lines_completed; /* in pixels */
if( !h->param.b_cabac )
h->param.analyse.i_trellis = 0;
h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
- h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
+ h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 1 );
if( h->param.rc.f_aq_strength <= 0 )
h->param.rc.i_aq_mode = 0;
- /* VAQ effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
- if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
- h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.rc.f_aq_strength / 0.7, 0, 1);
h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
{
if( h->frames.b_have_lowres )
x264_frame_init_lowres( h, fenc );
+ if( h->param.rc.i_aq_mode )
+ x264_adaptive_quant_frame( h, fenc );
+
if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
{
/* Nothing yet to encode */
int bframes; /* # consecutive B-frames before this P-frame */
int bframe_bits; /* total cost of those frames */
- /* AQ stuff */
- float aq_threshold;
- int *ac_energy;
-
int i_zones;
x264_zone_t *zones;
x264_zone_t *prev_zone;
}
// Find the total AC energy of the block in all planes.
-static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
+static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
{
/* This function contains annoying hacks because GCC has a habit of reordering emms
* and putting it after floating point ops. As a result, we put the emms at the end of the
* function and make sure that its always called before the float math. Noinline makes
* sure no reordering goes on. */
- /* FIXME: This array is larger than necessary because a bug in GCC causes an all-zero
- * array to be placed in .bss despite .bss not being correctly aligned on some platforms (win32?) */
- DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
unsigned int var=0, sad, i;
- if( satd || h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
+ for( i=0; i<3; i++ )
{
- for( i=0; i<3; i++ )
- {
- int w = i ? 8 : 16;
- int stride = h->fenc->i_stride[i];
- int offset = h->mb.b_interlaced
- ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
- : w * (mb_x + mb_y * stride);
- int pix = i ? PIXEL_8x8 : PIXEL_16x16;
- stride <<= h->mb.b_interlaced;
- var += h->pixf.var[pix]( h->fenc->plane[i]+offset, stride, &sad );
- // SATD to represent the block's overall complexity (bit cost) for intra encoding.
- // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
- if( var && satd )
- *satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - sad/2;
- }
- var = X264_MAX(var,1);
+ int w = i ? 8 : 16;
+ int stride = frame->i_stride[i];
+ int offset = h->mb.b_interlaced
+ ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
+ : w * (mb_x + mb_y * stride);
+ int pix = i ? PIXEL_8x8 : PIXEL_16x16;
+ stride <<= h->mb.b_interlaced;
+ var += h->pixf.var[pix]( frame->plane[i]+offset, stride, &sad );
}
- else var = h->rc->ac_energy[h->mb.i_mb_xy];
+ var = X264_MAX(var,1);
x264_emms();
return var;
}
-static void x264_autosense_aq( x264_t *h )
+void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
{
- double total = 0;
- double n = 0;
int mb_x, mb_y;
- // FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?). Can we reuse them?
- // FIXME: Is chroma SATD necessary?
for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
{
- int satd=0;
- int energy = ac_energy_mb( h, mb_x, mb_y, &satd );
- h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
- /* Weight the energy value by the SATD value of the MB.
- * This represents the fact that the more complex blocks in a frame should
- * be weighted more when calculating the optimal threshold. This also helps
- * diminish the negative effect of large numbers of simple blocks in a frame,
- * such as in the case of a letterboxed film. */
- total += logf(energy) * satd;
- n += satd;
+ int energy = ac_energy_mb( h, mb_x, mb_y, frame );
+ /* 10 constant chosen to result in approximately the same overall bitrate as without AQ. */
+ float qp_adj = h->param.rc.f_aq_strength * 1.5 * (logf(energy) - 10.0);
+ frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
}
- x264_emms();
- /* Calculate and store the threshold. */
- h->rc->aq_threshold = n ? total/n : 15;
}
/*****************************************************************************
*****************************************************************************/
void x264_adaptive_quant( x264_t *h )
{
- int energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
- /* Adjust the QP based on the AC energy of the macroblock. */
- float qp = h->rc->f_qpm;
- float qp_adj = 1.5 * (logf(energy) - h->rc->aq_threshold);
- if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
- qp_adj = x264_clip3f( qp_adj, -5, 5 );
- h->mb.i_qp = x264_clip3( qp + qp_adj * h->param.rc.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+ float qp, qp_adj;
+ x264_emms();
+ qp = h->rc->f_qpm;
+ qp_adj = h->fenc->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride];
+ h->mb.i_qp = x264_clip3( qp + qp_adj + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
/* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
* to lower the bit cost of the qp_delta. */
if( abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
h->thread[i]->rc = rc+i;
if( i )
rc[i] = rc[0];
- if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
- rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
}
return 0;
x264_free( rc->zones[i].param );
x264_free( rc->zones );
}
- for( i=0; i<h->param.i_threads; i++ )
- x264_free( rc[i].ac_energy );
x264_free( rc );
}
if( h->sh.i_type != SLICE_TYPE_B )
rc->last_non_b_pict_type = h->sh.i_type;
-
- /* Adaptive AQ thresholding algorithm. */
- if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
- /* Arbitrary value for "center" of the AQ curve.
- * Chosen so that any given value of CRF has on average similar bitrate with and without AQ. */
- h->rc->aq_threshold = logf(5000);
- else if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
- x264_autosense_aq(h);
}
static double predict_row_size( x264_t *h, int y, int qp )
int x264_ratecontrol_new ( x264_t * );
void x264_ratecontrol_delete( x264_t * );
+void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
+void x264_adaptive_quant( x264_t * );
void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
void x264_ratecontrol_start( x264_t *, int i_force_qp );
int x264_ratecontrol_slice_type( x264_t *, int i_frame );
int x264_ratecontrol_qp( x264_t * );
void x264_ratecontrol_end( x264_t *, int bits );
void x264_ratecontrol_summary( x264_t * );
-void x264_adaptive_quant( x264_t * );
void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
int x264_ratecontrol_get_estimated_size( x264_t const *);
int x264_rc_analyse_slice( x264_t *h );
int b_intra_penalty )
{
int i_score = 0;
+ /* Don't use the AQ'd scores for slicetype decision. */
+ int i_score_aq = 0;
/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
if( p1 != p0 )
dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
+ if( h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+ {
+ for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+ for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+ i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+ }
/* the edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
- if( h->param.rc.i_vbv_buffer_size )
+ else if( h->param.rc.i_vbv_buffer_size )
{
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
{
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
{
int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
- row_satd[ h->mb.i_mb_y ] += i_mb_cost;
+ int i_mb_cost_aq = i_mb_cost;
+ if( h->param.rc.i_aq_mode )
+ {
+ x264_emms();
+ i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
+ }
+ row_satd[ h->mb.i_mb_y ] += i_mb_cost_aq;
if( h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 )
{
+ /* Don't use AQ-weighted costs for slicetype decision, only for ratecontrol. */
i_score += i_mb_cost;
+ i_score_aq += i_mb_cost_aq;
}
}
}
}
- else if( h->sps->i_mb_width > 2 && h->sps->i_mb_height > 2 )
+ else
{
for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ )
for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ )
- i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
- }
- else
- {
- for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
- for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
- i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+ {
+ int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
+ int i_mb_cost_aq = i_mb_cost;
+ if( h->param.rc.i_aq_mode )
+ {
+ x264_emms();
+ i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
+ }
+ i_score += i_mb_cost;
+ i_score_aq += i_mb_cost_aq;
+ }
}
-
if( b != p1 )
i_score = i_score * 100 / (120 + h->param.i_bframe_bias);
frames[b]->i_cost_est[b-p0][p1-b] = i_score;
+ frames[b]->i_cost_est_aq[b-p0][p1-b] = i_score_aq;
// fprintf( stderr, "frm %d %c(%d,%d): %6d %6d imb:%d \n", frames[b]->i_frame,
// (p1==0?'I':b<p1?'B':'P'), b-p0, p1-b, i_score, frames[b]->i_cost_est[0][0], frames[b]->i_intra_mbs[b-p0] );
x264_emms();
frames[b] = h->fenc;
cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
+
+ /* In AQ, use the weighted score instead. */
+ if( h->param.rc.i_aq_mode )
+ cost = frames[b]->i_cost_est[b-p0][p1-b];
+
h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
h->fdec->i_satd = cost;
H0( " --ipratio <float> QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor );
H0( " --pbratio <float> QP factor between P and B [%.2f]\n", defaults->rc.f_pb_factor );
H1( " --chroma-qp-offset <integer> QP difference between chroma and luma [%d]\n", defaults->analyse.i_chroma_qp_offset );
- H0( " --aq-mode <integer> How AQ distributes bits [%d]\n"
+ H1( " --aq-mode <integer> AQ method [%d]\n"
" - 0: Disabled\n"
- " - 1: Avoid moving bits between frames\n"
- " - 2: Move bits between frames\n", defaults->rc.i_aq_mode );
+ " - 1: Variance AQ (complexity mask)\n", defaults->rc.i_aq_mode );
H0( " --aq-strength <float> Reduces blocking and blurring in flat and\n"
" textured areas. [%.1f]\n"
" - 0.5: weak AQ\n"
#include <stdarg.h>
-#define X264_BUILD 61
+#define X264_BUILD 62
/* x264_t:
* opaque handler for encoder */
#define X264_RC_CRF 1
#define X264_RC_ABR 2
#define X264_AQ_NONE 0
-#define X264_AQ_LOCAL 1
-#define X264_AQ_GLOBAL 2
+#define X264_AQ_VARIANCE 1
static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };