From 0c841de6810678f3da1c06a34595cb490d59eeb6 Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Sat, 8 Nov 2008 20:16:17 -0800 Subject: [PATCH] Faster b-adapt + adaptive quantization Factor out pow to be only called once per macroblock. Speeds up b-adapt, especially b-adapt 2, considerably. Speed boost is as high as 24% with b-adapt 2 + b-frames 16. --- common/frame.c | 4 ++++ common/frame.h | 1 + encoder/encoder.c | 3 ++- encoder/ratecontrol.c | 2 ++ encoder/slicetype.c | 10 ++-------- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/common/frame.c b/common/frame.c index 26c56407..85bbcc0d 100644 --- a/common/frame.c +++ b/common/frame.c @@ -124,7 +124,11 @@ x264_frame_t *x264_frame_new( x264_t *h ) CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) ); if( h->param.rc.i_aq_mode ) + { CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) ); + if( h->frames.b_have_lowres ) + CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) ); + } x264_pthread_mutex_init( &frame->mutex, NULL ); x264_pthread_cond_init( &frame->cv, NULL ); diff --git a/common/frame.h b/common/frame.h index 9c273824..aad77f5e 100644 --- a/common/frame.h +++ b/common/frame.h @@ -83,6 +83,7 @@ typedef struct float *f_qp_offset; int b_intra_calculated; uint16_t *i_intra_cost; + uint16_t *i_inv_qscale_factor; /* threading */ int i_lines_completed; /* in pixels */ diff --git a/encoder/encoder.c b/encoder/encoder.c index 4a9860fa..c22f7b0c 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -511,7 +511,8 @@ static int x264_validate_parameters( x264_t *h ) h->mb.i_psy_trellis = 0; h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12); h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 1 ); - if( h->param.rc.f_aq_strength <= 0 ) + h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 ); + if( h->param.rc.f_aq_strength == 0 ) h->param.rc.i_aq_mode = 0; h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 ); diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 002b79db..b0ee384a 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -201,6 +201,8 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame ) /* 10 constant chosen to result in approximately the same overall bitrate as without AQ. */ float qp_adj = h->param.rc.f_aq_strength * 1.5 * (logf(energy) - 10.0); frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj; + if( h->frames.b_have_lowres ) + frame->i_inv_qscale_factor[mb_x+mb_y*h->mb.i_mb_stride] = FIX8(pow(2.0,-qp_adj/6.0)); } } diff --git a/encoder/slicetype.c b/encoder/slicetype.c index 180448e9..a8c028cf 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -306,10 +306,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search ); int i_mb_cost_aq = i_mb_cost; if( h->param.rc.i_aq_mode ) - { - x264_emms(); - i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0); - } + i_mb_cost_aq = (i_mb_cost_aq * frames[b]->i_inv_qscale_factor[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride] + 128) >> 8; row_satd[ h->mb.i_mb_y ] += i_mb_cost_aq; if( h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 && h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 ) @@ -329,10 +326,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search ); int i_mb_cost_aq = i_mb_cost; if( h->param.rc.i_aq_mode ) - { - x264_emms(); - i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0); - } + i_mb_cost_aq = (i_mb_cost_aq * frames[b]->i_inv_qscale_factor[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride] + 128) >> 8; i_score += i_mb_cost; i_score_aq += i_mb_cost_aq; } -- 2.40.0