Factor out pow to be only called once per macroblock. Speeds up b-adapt, especially b-adapt 2, considerably.
Speed boost is as high as 24% with b-adapt 2 + b-frames 16.
CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
if( h->param.rc.i_aq_mode )
+ {
CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+ if( h->frames.b_have_lowres )
+ CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
+ }
x264_pthread_mutex_init( &frame->mutex, NULL );
x264_pthread_cond_init( &frame->cv, NULL );
float *f_qp_offset;
int b_intra_calculated;
uint16_t *i_intra_cost;
+ uint16_t *i_inv_qscale_factor;
/* threading */
int i_lines_completed; /* in pixels */
h->mb.i_psy_trellis = 0;
h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 1 );
- if( h->param.rc.f_aq_strength <= 0 )
+ h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
+ if( h->param.rc.f_aq_strength == 0 )
h->param.rc.i_aq_mode = 0;
h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
/* 10 constant chosen to result in approximately the same overall bitrate as without AQ. */
float qp_adj = h->param.rc.f_aq_strength * 1.5 * (logf(energy) - 10.0);
frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
+ if( h->frames.b_have_lowres )
+ frame->i_inv_qscale_factor[mb_x+mb_y*h->mb.i_mb_stride] = FIX8(pow(2.0,-qp_adj/6.0));
}
}
int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search );
int i_mb_cost_aq = i_mb_cost;
if( h->param.rc.i_aq_mode )
- {
- x264_emms();
- i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
- }
+ i_mb_cost_aq = (i_mb_cost_aq * frames[b]->i_inv_qscale_factor[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride] + 128) >> 8;
row_satd[ h->mb.i_mb_y ] += i_mb_cost_aq;
if( h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 )
int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search );
int i_mb_cost_aq = i_mb_cost;
if( h->param.rc.i_aq_mode )
- {
- x264_emms();
- i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
- }
+ i_mb_cost_aq = (i_mb_cost_aq * frames[b]->i_inv_qscale_factor[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride] + 128) >> 8;
i_score += i_mb_cost;
i_score_aq += i_mb_cost_aq;
}