* because sum(abs(diff)) >= abs(diff(sum)). */
uint16_t *sums_base = m->integral;
/* due to a GCC bug on some platforms (win32?), zero[] may not actually be aligned.
- * unlike the similar case in ratecontrol.c, this is not a problem because it is not used for any
- * SSE instructions and the only loss is a tiny bit of performance. */
+ * this is not a problem because it is not used for any SSE instructions. */
DECLARE_ALIGNED_16( static uint8_t zero[8*FENC_STRIDE] );
DECLARE_ALIGNED_16( int enc_dc[4] );
int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
}
// Find the total AC energy of the block in all planes.
-static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
+static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
{
/* This function contains annoying hacks because GCC has a habit of reordering emms
* and putting it after floating point ops. As a result, we put the emms at the end of the
* function and make sure that its always called before the float math. Noinline makes
* sure no reordering goes on. */
- unsigned int var = 0, i;
+ uint32_t var = 0, i;
for( i = 0; i < 3; i++ )
{
int w = i ? 8 : 16;
stride <<= h->mb.b_interlaced;
var += h->pixf.var[pix]( frame->plane[i]+offset, stride );
}
- var = X264_MAX(var,1);
x264_emms();
return var;
}
177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
};
-static int x264_exp2fix8( float x )
+static ALWAYS_INLINE float x264_log2( uint32_t x )
+{
+ int lz = x264_clz( x );
+ return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
+}
+
+static ALWAYS_INLINE int x264_exp2fix8( float x )
{
int i, f;
x += 8;
{
/* constants chosen to result in approximately the same overall bitrate as without AQ.
* FIXME: while they're written in 5 significant digits, they're only tuned to 2. */
- float strength = h->param.rc.f_aq_strength * 1.0397;
int mb_x, mb_y;
+ float strength;
+ float avg_adj = 0.f;
+ if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
+ {
+ for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
+ for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
+ {
+ uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
+ float qp_adj = x264_log2( energy + 2 );
+ qp_adj *= qp_adj;
+ frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
+ avg_adj += qp_adj;
+ }
+ avg_adj /= h->mb.i_mb_count;
+ strength = h->param.rc.f_aq_strength * avg_adj * (1.f / 6000.f);
+ }
+ else
+ strength = h->param.rc.f_aq_strength * 1.0397f;
for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
{
- uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
- int lz = x264_clz( energy );
- float qp_adj = strength * (log2_lut[(energy<<lz>>24)&0x7f] - lz + 16.573f);
+ float qp_adj;
+ if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
+ {
+ qp_adj = frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride];
+ qp_adj = strength * (qp_adj - avg_adj);
+ }
+ else
+ {
+ uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
+ qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
+ }
frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
if( h->frames.b_have_lowres )
frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));
H1( " --chroma-qp-offset <integer> QP difference between chroma and luma [%d]\n", defaults->analyse.i_chroma_qp_offset );
H1( " --aq-mode <integer> AQ method [%d]\n"
" - 0: Disabled\n"
- " - 1: Variance AQ (complexity mask)\n", defaults->rc.i_aq_mode );
+ " - 1: Variance AQ (complexity mask)\n"
+ " - 2: Auto-variance AQ (experimental)\n", defaults->rc.i_aq_mode );
H0( " --aq-strength <float> Reduces blocking and blurring in flat and\n"
" textured areas. [%.1f]\n"
" - 0.5: weak AQ\n"
else if( !strcasecmp( optarg, "psnr" ) )
{
param->analyse.f_psy_rd = 0;
- param->rc.i_aq_mode = 0;
+ param->rc.i_aq_mode = X264_AQ_NONE;
}
else if( !strcasecmp( optarg, "ssim" ) )
{
param->analyse.f_psy_rd = 0;
+ param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
}
else if( !strcasecmp( optarg, "touhou" ) )
{