From d8e790ca7c0a524ea0aa01bf0d9020530e3dba9a Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Thu, 9 Mar 2006 15:59:08 +0000 Subject: [PATCH] macroblock-level ratecontrol: improved vbv strictness, and improved quality when using vbv. git-svn-id: svn://svn.videolan.org/x264/trunk@464 df754926-b1dd-0310-bc7b-ec298dee348c --- common/common.h | 8 +- common/frame.c | 19 ++- common/frame.h | 10 +- encoder/cavlc.c | 16 +- encoder/encoder.c | 19 +-- encoder/ratecontrol.c | 291 +++++++++++++++++++++++++++++++---- encoder/ratecontrol.h | 1 + encoder/slicetype_decision.c | 116 ++++++++++---- 8 files changed, 401 insertions(+), 79 deletions(-) diff --git a/common/common.h b/common/common.h index 8a9207bb..b0ae118f 100644 --- a/common/common.h +++ b/common/common.h @@ -55,6 +55,10 @@ #define UNUSED #endif +#define X264_BFRAME_MAX 16 +#define X264_SLICE_MAX 4 +#define X264_NAL_MAX (4 + X264_SLICE_MAX) + /**************************************************************************** * Includes ****************************************************************************/ @@ -217,10 +221,6 @@ static const int x264_scan8[16+2*4] = 5 R R */ -#define X264_BFRAME_MAX 16 -#define X264_SLICE_MAX 4 -#define X264_NAL_MAX (4 + X264_SLICE_MAX) - typedef struct x264_ratecontrol_t x264_ratecontrol_t; typedef struct x264_vlc_table_t x264_vlc_table_t; diff --git a/common/frame.c b/common/frame.c index 098256b8..ed73bb67 100644 --- a/common/frame.c +++ b/common/frame.c @@ -29,7 +29,7 @@ x264_frame_t *x264_frame_new( x264_t *h ) { x264_frame_t *frame = x264_malloc( sizeof( x264_frame_t ) ); - int i; + int i, j; int i_mb_count = h->mb.i_mb_count; int i_stride; @@ -116,20 +116,27 @@ x264_frame_t *x264_frame_new( x264_t *h ) frame->ref[1] = NULL; } + frame->i_row_bits = x264_malloc( i_lines/16 * sizeof( int ) ); + frame->i_row_qp = x264_malloc( i_lines/16 * sizeof( int ) ); + for( i = 0; i < h->param.i_bframe + 2; i++ ) + for( j = 0; j < h->param.i_bframe + 2; j++ ) + frame->i_row_satds[i][j] = x264_malloc( i_lines/16 * sizeof( int ) ); + return frame; } void x264_frame_delete( x264_frame_t *frame ) { - int i; + int i, j; for( i = 0; i < frame->i_plane; i++ ) - { x264_free( frame->buffer[i] ); - } for( i = 4; i < 12; i++ ) /* filtered planes */ - { x264_free( frame->buffer[i] ); - } + for( i = 0; i < X264_BFRAME_MAX+2; i++ ) + for( j = 0; j < X264_BFRAME_MAX+2; j++ ) + x264_free( frame->i_row_satds[i][j] ); + x264_free( frame->i_row_bits ); + x264_free( frame->i_row_qp ); x264_free( frame->mb_type ); x264_free( frame->mv[0] ); x264_free( frame->mv[1] ); diff --git a/common/frame.h b/common/frame.h index 13b0e632..e682569a 100644 --- a/common/frame.h +++ b/common/frame.h @@ -36,6 +36,7 @@ typedef struct int i_frame; /* Presentation frame number */ int i_frame_num; /* Coded frame number */ int b_kept_as_ref; + float f_qp_avg; /* YUV buffer */ int i_plane; @@ -62,8 +63,13 @@ typedef struct /* for adaptive B-frame decision. * contains the SATD cost of the lowres frame encoded in various modes * FIXME: how big an array do we need? */ - int i_cost_est[16][16]; - int i_intra_mbs[16]; + int i_cost_est[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]; + int i_satd; // the i_cost_est of the selected frametype + int i_intra_mbs[X264_BFRAME_MAX+2]; + int *i_row_satds[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]; + int *i_row_satd; + int *i_row_bits; + int *i_row_qp; } x264_frame_t; diff --git a/encoder/cavlc.c b/encoder/cavlc.c index 3c863422..893d90df 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -254,6 +254,18 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int *l, i } } +static void cavlc_qp_delta( x264_t *h, bs_t *s ) +{ + int i_dqp = h->mb.i_qp - h->mb.i_last_qp; + if( i_dqp ) + { + i_dqp = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1); + if( i_dqp > 52 ) + i_dqp = 103 - i_dqp; + } + bs_write_ue( s, i_dqp ); +} + static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list ) { int i; @@ -676,7 +688,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) /* write residual */ if( i_mb_type == I_16x16 ) { - bs_write_se( s, h->mb.i_qp - h->mb.i_last_qp ); + cavlc_qp_delta( h, s ); /* DC Luma */ block_residual_write_cavlc( h, s, BLOCK_INDEX_LUMA_DC , h->dct.luma16x16_dc, 16 ); @@ -688,7 +700,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) } else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 ) { - bs_write_se( s, h->mb.i_qp - h->mb.i_last_qp ); + cavlc_qp_delta( h, s ); x264_macroblock_luma_write_cavlc( h, s ); } if( h->mb.i_cbp_chroma != 0 ) diff --git a/encoder/encoder.c b/encoder/encoder.c index 15aa22f4..6ed7862e 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -189,7 +189,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh, /* If effective qp <= 15, deblocking would have no effect anyway */ if( param->b_deblocking_filter && ( h->mb.b_variable_qp - || 15 < i_qp + X264_MAX(param->i_deblocking_filter_alphac0, param->i_deblocking_filter_beta) ) ) + || 15 < i_qp + 2 * X264_MAX(param->i_deblocking_filter_alphac0, param->i_deblocking_filter_beta) ) ) { sh->i_disable_deblocking_filter_idc = 0; } @@ -633,6 +633,7 @@ x264_t *x264_encoder_open ( x264_param_t *param ) param->cpu&X264_CPU_ALTIVEC ? "Altivec " : "" ); h->thread[0] = h; + h->i_thread_num = 0; for( i = 1; i < param->i_threads; i++ ) h->thread[i] = x264_malloc( sizeof(x264_t) ); @@ -1087,6 +1088,7 @@ static inline int x264_slices_write( x264_t *h ) if( h->param.i_threads == 1 ) { + x264_ratecontrol_threads_start( h ); x264_slice_write( h ); i_frame_size = h->out.nal[h->out.i_nal-1].i_payload; } @@ -1104,11 +1106,13 @@ static inline int x264_slices_write( x264_t *h ) memcpy( t, h, sizeof(x264_t) ); t->out.p_bitstream += i*i_bs_size; bs_init( &t->out.bs, t->out.p_bitstream, i_bs_size ); + t->i_thread_num = i; } t->sh.i_first_mb = (i * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width; t->sh.i_last_mb = ((i+1) * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width; t->out.i_nal = i_nal + i; } + x264_ratecontrol_threads_start( h ); /* dispatch */ #if HAVE_PTHREAD @@ -1498,24 +1502,21 @@ do_encode: /* ---------------------- Update encoder state ------------------------- */ + /* update rc */ + x264_cpu_restore( h->param.cpu ); + x264_ratecontrol_end( h, i_frame_size * 8 ); + /* handle references */ if( i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE ) - { x264_reference_update( h ); - } + x264_frame_put( h->frames.unused, h->fenc ); /* increase frame count */ h->i_frame++; /* restore CPU state (before using float again) */ - /* XXX: not needed? (done above) */ x264_cpu_restore( h->param.cpu ); - /* update rc */ - x264_ratecontrol_end( h, i_frame_size * 8 ); - - x264_frame_put( h->frames.unused, h->fenc ); - x264_noise_reduction_update( h ); TIMER_STOP( i_mtime_encode_frame ); diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index c5443726..578f2d7e 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -84,6 +84,7 @@ struct x264_ratecontrol_t /* constants */ int b_abr; int b_2pass; + int b_vbv; double fps; double bitrate; double rate_tolerance; @@ -93,7 +94,8 @@ struct x264_ratecontrol_t /* current frame */ ratecontrol_entry_t *rce; int qp; /* qp for current frame */ - float qpa; /* average of macroblocks' qp (same as qp if no adaptive quant) */ + int qpm; /* qp for current macroblock */ + float qpa; /* average of macroblocks' qp */ int slice_type; int qp_force; @@ -113,6 +115,8 @@ struct x264_ratecontrol_t double short_term_cplxsum; double short_term_cplxcount; double rate_factor_constant; + double ip_offset; + double pb_offset; /* 2pass stuff */ FILE *p_stat_file_out; @@ -134,6 +138,15 @@ struct x264_ratecontrol_t double mv_bits_sum[5]; int frame_count[5]; /* number of frames of each type */ + /* MBRC stuff */ + double frame_size_planned; + int first_row, last_row; /* region of the frame to be encoded by this thread */ + predictor_t *row_pred; + predictor_t row_preds[5]; + predictor_t pred_b_from_p; /* predict B-frame size from P-frame satd */ + int bframes; /* # consecutive B-frames before this P-frame */ + int bframe_bits; /* total cost of those frames */ + int i_zones; x264_zone_t *zones; }; @@ -143,6 +156,8 @@ static int parse_zones( x264_t *h ); static int init_pass2(x264_t *); static float rate_estimate_qscale( x264_t *h, int pict_type ); static void update_vbv( x264_t *h, int bits ); +static double predict_size( predictor_t *p, double q, double var ); +static void update_predictor( predictor_t *p, double q, double var, double bits ); int x264_rc_analyse_slice( x264_t *h ); /* Terminology: @@ -179,12 +194,11 @@ int x264_ratecontrol_new( x264_t *h ) x264_cpu_restore( h->param.cpu ); - h->rc = rc = x264_malloc( sizeof( x264_ratecontrol_t ) ); - memset(rc, 0, sizeof(*rc)); + h->rc = rc = x264_malloc( h->param.i_threads * sizeof(x264_ratecontrol_t) ); + memset( rc, 0, h->param.i_threads * sizeof(x264_ratecontrol_t) ); rc->b_abr = ( h->param.rc.b_cbr || h->param.rc.i_rf_constant ) && !h->param.rc.b_stat_read; rc->b_2pass = h->param.rc.b_cbr && h->param.rc.b_stat_read; - h->mb.b_variable_qp = 0; /* FIXME: use integers */ if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0) @@ -217,8 +231,8 @@ int x264_ratecontrol_new( x264_t *h ) else if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 ) { - if( h->param.rc.i_vbv_buffer_size < 10 * h->param.rc.i_vbv_max_bitrate / rc->fps ) { - h->param.rc.i_vbv_buffer_size = 10 * h->param.rc.i_vbv_max_bitrate / rc->fps; + if( h->param.rc.i_vbv_buffer_size < 3 * h->param.rc.i_vbv_max_bitrate / rc->fps ) { + h->param.rc.i_vbv_buffer_size = 3 * h->param.rc.i_vbv_max_bitrate / rc->fps; x264_log( h, X264_LOG_ERROR, "VBV buffer size too small, using %d kbit\n", h->param.rc.i_vbv_buffer_size ); } @@ -227,6 +241,7 @@ int x264_ratecontrol_new( x264_t *h ) rc->buffer_fill = rc->buffer_size * h->param.rc.f_vbv_buffer_init; rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate); + rc->b_vbv = 1; } else if( h->param.rc.i_vbv_max_bitrate ) x264_log(h, X264_LOG_ERROR, "VBV maxrate specified, but no bufsize.\n"); @@ -235,6 +250,8 @@ int x264_ratecontrol_new( x264_t *h ) rc->rate_tolerance = 0.01; } + h->mb.b_variable_qp = rc->b_vbv; + if( rc->b_abr ) { /* FIXME shouldn't need to arbitrarily specify a QP, @@ -254,9 +271,11 @@ int x264_ratecontrol_new( x264_t *h ) / qp2qscale( h->param.rc.i_rf_constant ); } + rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0); + rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0); rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant; - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 ); - rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 ); + rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 ); + rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 ); rc->lstep = exp2f(h->param.rc.i_qp_step / 6.0); rc->last_qscale = qp2qscale(26); @@ -268,7 +287,11 @@ int x264_ratecontrol_new( x264_t *h ) rc->pred[i].coeff= 2.0; rc->pred[i].count= 1.0; rc->pred[i].decay= 0.5; + rc->row_preds[i].coeff= .25; + rc->row_preds[i].count= 1.0; + rc->row_preds[i].decay= 0.5; } + rc->pred_b_from_p = rc->pred[0]; if( parse_zones( h ) < 0 ) return -1; @@ -552,19 +575,34 @@ void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp ) } } + if( h->fdec->i_row_bits ) + { + memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) ); + } + + if( i_slice_type != SLICE_TYPE_B ) + { + rc->bframe_bits = 0; + rc->bframes = 0; + while( h->frames.current[rc->bframes] && IS_X264_TYPE_B(h->frames.current[rc->bframes]->i_type) ) + rc->bframes++; + } + + rc->qpa = 0; + if( i_force_qp ) { - rc->qpa = rc->qp = i_force_qp - 1; + rc->qpm = rc->qp = i_force_qp - 1; } else if( rc->b_abr ) { - rc->qpa = rc->qp = + rc->qpm = rc->qp = x264_clip3( (int)(qscale2qp( rate_estimate_qscale( h, i_slice_type ) ) + .5), 0, 51 ); } else if( rc->b_2pass ) { rce->new_qscale = rate_estimate_qscale( h, i_slice_type ); - rc->qpa = rc->qp = rce->new_qp = + rc->qpm = rc->qp = rce->new_qp = x264_clip3( (int)(qscale2qp(rce->new_qscale) + 0.5), 0, 51 ); } else /* CQP */ @@ -574,18 +612,103 @@ void x264_ratecontrol_start( x264_t *h, int i_slice_type, int i_force_qp ) q = ( rc->qp_constant[ SLICE_TYPE_B ] + rc->qp_constant[ SLICE_TYPE_P ] ) / 2; else q = rc->qp_constant[ i_slice_type ]; - rc->qpa = rc->qp = q; + rc->qpm = rc->qp = q; } } +double predict_row_size( x264_t *h, int y, int qp ) +{ + /* average between two predictors: + * absolute SATD, and scaled bit cost of the colocated row in the previous frame */ + x264_ratecontrol_t *rc = h->rc; + double pred_s = predict_size( rc->row_pred, qp2qscale(qp), h->fdec->i_row_satd[y] ); + double pred_t = 0; + if( rc->slice_type != SLICE_TYPE_I + && h->fref0[0]->i_type == h->fdec->i_type + && h->fref0[0]->i_row_satd[y] > 0 ) + { + pred_t = h->fref0[0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref0[0]->i_row_satd[y] + * qp2qscale(h->fref0[0]->i_row_qp[y]) / qp2qscale(qp); + } + if( pred_t == 0 ) + pred_t = pred_s; + + return (pred_s + pred_t) / 2; +} + +double predict_row_size_sum( x264_t *h, int y, int qp ) +{ + int i; + double bits = 0; + for( i = h->rc->first_row; i <= y; i++ ) + bits += h->fdec->i_row_bits[i]; + for( i = y+1; i <= h->rc->last_row; i++ ) + bits += predict_row_size( h, i, qp ); + return bits; +} + void x264_ratecontrol_mb( x264_t *h, int bits ) { - /* currently no adaptive quant */ + x264_ratecontrol_t *rc = h->rc; + const int y = h->mb.i_mb_y; + + x264_cpu_restore( h->param.cpu ); + + h->fdec->i_row_bits[y] += bits; + rc->qpa += rc->qpm; + + if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv ) + return; + + h->fdec->i_row_qp[y] = rc->qpm; + + if( rc->slice_type == SLICE_TYPE_B ) + { + /* B-frames shouldn't use lower QP than their reference frames */ + if( y < rc->last_row ) + { + rc->qpm = X264_MAX( rc->qp, + X264_MIN( h->fref0[0]->i_row_qp[y+1], + h->fref1[0]->i_row_qp[y+1] )); + } + } + else + { + update_predictor( rc->row_pred, qp2qscale(rc->qpm), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] ); + + /* tweak quality based on difference from predicted size */ + if( y < rc->last_row && h->stat.i_slice_count[rc->slice_type] > 0 ) + { + int prev_row_qp = h->fdec->i_row_qp[y]; + int b0 = predict_row_size_sum( h, y, rc->qpm ); + int b1 = b0; + int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max ); + int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min ); + float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned; + + while( rc->qpm < i_qp_max + && (b1 > rc->frame_size_planned * 1.15 + || (rc->buffer_fill - b1 < buffer_left_planned * 0.5))) + { + rc->qpm ++; + b1 = predict_row_size_sum( h, y, rc->qpm ); + } + + while( rc->qpm > i_qp_min + && buffer_left_planned > rc->buffer_size * 0.4 + && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp) + || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) ) + { + rc->qpm --; + b1 = predict_row_size_sum( h, y, rc->qpm ); + } + } + } } int x264_ratecontrol_qp( x264_t *h ) { - return h->rc->qp; + return h->rc->qpm; } /* In 2pass, force the same frame types as in the 1st pass */ @@ -654,6 +777,16 @@ void x264_ratecontrol_end( x264_t *h, int bits ) for( i = B_DIRECT; i < B_8x8; i++ ) h->stat.frame.i_mb_count_p += mbs[i]; + if( rc->b_vbv ) + { + for( i = 1; i < h->param.i_threads; i++ ) + rc->qpa += rc[i].qpa; + rc->qpa /= h->mb.i_mb_count; + } + else + rc->qpa = rc->qp; + h->fdec->f_qp_avg = rc->qpa; + if( h->param.rc.b_stat_write ) { char c_type = rc->slice_type==SLICE_TYPE_I ? (h->fenc->i_poc==0 ? 'I' : 'i') @@ -667,7 +800,7 @@ void x264_ratecontrol_end( x264_t *h, int bits ) : '-'; fprintf( rc->p_stat_file_out, "in:%d out:%d type:%c q:%.2f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n", - h->fenc->i_frame, h->i_frame-1, + h->fenc->i_frame, h->i_frame, c_type, rc->qpa, h->stat.frame.i_itex_bits, h->stat.frame.i_ptex_bits, h->stat.frame.i_hdr_bits, h->stat.frame.i_misc_bits, @@ -705,6 +838,24 @@ void x264_ratecontrol_end( x264_t *h, int bits ) rc->expected_bits_sum += qscale2bits( rc->rce, qp2qscale(rc->rce->new_qp) ); } + if( rc->b_vbv ) + { + if( rc->slice_type == SLICE_TYPE_B ) + { + rc->bframe_bits += bits; + if( !h->frames.current[0] || !IS_X264_TYPE_B(h->frames.current[0]->i_type) ) + update_predictor( &rc->pred_b_from_p, qp2qscale(rc->qpa), h->fref1[0]->i_satd, rc->bframe_bits / rc->bframes ); + } + else + { + /* Update row predictor based on data collected by other threads. */ + int y; + for( y = rc->last_row+1; y < h->sps->i_mb_height; y++ ) + update_predictor( rc->row_pred, qp2qscale(h->fdec->i_row_qp[y]), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] ); + rc->row_preds[rc->slice_type] = *rc->row_pred; + } + } + update_vbv( h, bits ); if( rc->slice_type != SLICE_TYPE_B ) @@ -891,16 +1042,17 @@ static void update_predictor( predictor_t *p, double q, double var, double bits static void update_vbv( x264_t *h, int bits ) { x264_ratecontrol_t *rcc = h->rc; - if( !rcc->buffer_size ) + + if( rcc->last_satd >= h->mb.i_mb_count ) + update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits ); + + if( !rcc->b_vbv ) return; rcc->buffer_fill += rcc->buffer_rate - bits; if( rcc->buffer_fill < 0 && !rcc->b_2pass ) x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rcc->buffer_fill ); rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size ); - - if(rcc->last_satd > 100) - update_predictor( &rcc->pred[rcc->slice_type], qp2qscale(rcc->qpa), rcc->last_satd, bits ); } // apply VBV constraints and clip qscale to between lmin and lmax @@ -915,17 +1067,18 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) * since they are controlled by the P-frames' QPs. * FIXME: in 2pass we could modify previous frames' QP too, * instead of waiting for the buffer to fill */ - if( rcc->buffer_size && + if( rcc->b_vbv && ( pict_type == SLICE_TYPE_P || ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) ) { if( rcc->buffer_fill/rcc->buffer_size < 0.5 ) q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 ); } - /* Now a hard threshold to make sure the frame fits in VBV. - * This one is mostly for I-frames. */ - if( rcc->buffer_size && rcc->last_satd > 0 ) + + if( rcc->b_vbv && rcc->last_satd > 0 ) { + /* Now a hard threshold to make sure the frame fits in VBV. + * This one is mostly for I-frames. */ double bits = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd ); double qf = 1.0; if( bits > rcc->buffer_fill/2 ) @@ -935,6 +1088,28 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) if( bits < rcc->buffer_rate/2 ) q *= bits*2/rcc->buffer_rate; q = X264_MAX( q0, q ); + + /* Check B-frame complexity, and use up any bits that would + * overflow before the next P-frame. */ + if( rcc->slice_type == SLICE_TYPE_P ) + { + int nb = rcc->bframes; + double pbbits = bits; + double bbits = predict_size( &rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd ); + double space; + + if( bbits > rcc->buffer_rate ) + nb = 0; + pbbits += nb * bbits; + + space = rcc->buffer_fill + (1+nb)*rcc->buffer_rate - rcc->buffer_size; + if( pbbits < space ) + { + q *= X264_MAX( pbbits / space, + bits / (0.5 * rcc->buffer_size) ); + } + q = X264_MAX( q0-5, q ); + } } if(lmin==lmax) @@ -976,12 +1151,37 @@ static float rate_estimate_qscale(x264_t *h, int pict_type) if( pict_type == SLICE_TYPE_B ) { - rcc->last_satd = 0; + /* B-frames don't have independent ratecontrol, but rather get the + * average QP of the two adjacent P-frames + an offset */ + + int i0 = IS_X264_TYPE_I(h->fref0[0]->i_type); + int i1 = IS_X264_TYPE_I(h->fref1[0]->i_type); + int dt0 = abs(h->fenc->i_poc - h->fref0[0]->i_poc); + int dt1 = abs(h->fenc->i_poc - h->fref1[0]->i_poc); + float q0 = h->fref0[0]->f_qp_avg; + float q1 = h->fref1[0]->f_qp_avg; + + if( h->fref0[0]->i_type == X264_TYPE_BREF ) + q0 -= rcc->pb_offset/2; + if( h->fref1[0]->i_type == X264_TYPE_BREF ) + q1 -= rcc->pb_offset/2; + + if(i0 && i1) + q = (q0 + q1) / 2 + rcc->ip_offset; + else if(i0) + q = q1; + else if(i1) + q = q0; + else + q = (q0*dt1 + q1*dt0) / (dt0 + dt1); + if(h->fenc->b_kept_as_ref) - q = rcc->last_qscale * sqrtf(h->param.rc.f_pb_factor); + q += rcc->pb_offset/2; else - q = rcc->last_qscale * h->param.rc.f_pb_factor; - return x264_clip3f(q, lmin, lmax); + q += rcc->pb_offset; + + rcc->last_satd = 0; + return qp2qscale(q); } else { @@ -1082,10 +1282,47 @@ static float rate_estimate_qscale(x264_t *h, int pict_type) rcc->last_qscale_for[pict_type] = rcc->last_qscale = q; + rcc->frame_size_planned = predict_size( &rcc->pred[rcc->slice_type], q, rcc->last_satd ); + return q; } } +/* Distribute bits among the slices, proportional to their estimated complexity */ +void x264_ratecontrol_threads_start( x264_t *h ) +{ + x264_ratecontrol_t *rc = h->rc; + int t, y; + double den = 0; + double frame_size_planned = rc->frame_size_planned; + + for( t = 0; t < h->param.i_threads; t++ ) + { + h->thread[t]->rc = &rc[t]; + if( t > 0 ) + rc[t] = rc[0]; + } + + if( !rc->b_vbv || rc->slice_type == SLICE_TYPE_B ) + return; + + for( t = 0; t < h->param.i_threads; t++ ) + { + rc[t].first_row = h->thread[t]->sh.i_first_mb / h->sps->i_mb_width; + rc[t].last_row = (h->thread[t]->sh.i_last_mb-1) / h->sps->i_mb_width; + rc[t].frame_size_planned = 1; + rc[t].row_pred = &rc[t].row_preds[rc->slice_type]; + if( h->param.i_threads > 1 ) + { + for( y = rc[t].first_row; y<= rc[t].last_row; y++ ) + rc[t].frame_size_planned += predict_row_size( h, y, qscale2qp(rc[t].qp) ); + } + den += rc[t].frame_size_planned; + } + for( t = 0; t < h->param.i_threads; t++ ) + rc[t].frame_size_planned *= frame_size_planned / den; +} + static int init_pass2( x264_t *h ) { x264_ratecontrol_t *rcc = h->rc; diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h index ceb50830..a18c4922 100644 --- a/encoder/ratecontrol.h +++ b/encoder/ratecontrol.h @@ -28,6 +28,7 @@ int x264_ratecontrol_new ( x264_t * ); void x264_ratecontrol_delete( x264_t * ); void x264_ratecontrol_start( x264_t *, int i_slice_type, int i_force_qp ); +void x264_ratecontrol_threads_start( x264_t * ); int x264_ratecontrol_slice_type( x264_t *, int i_frame ); void x264_ratecontrol_mb( x264_t *, int bits ); int x264_ratecontrol_qp( x264_t * ); diff --git a/encoder/slicetype_decision.c b/encoder/slicetype_decision.c index f7dc52da..bc3effa5 100644 --- a/encoder/slicetype_decision.c +++ b/encoder/slicetype_decision.c @@ -57,7 +57,6 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, uint8_t pix1[9*9], pix2[8*8]; x264_me_t m[2]; - int mvc[4][2], i_mvc; int i_bcost = COST_MAX; int i_cost_bak; int l, i; @@ -73,7 +72,7 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, h->mb.mv_max_fpel[0] = 8*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 4; h->mb.mv_min_spel[0] = 4*( h->mb.mv_min_fpel[0] - 8 ); h->mb.mv_max_spel[0] = 4*( h->mb.mv_max_fpel[0] + 8 ); - if( h->mb.i_mb_x <= 1) + if( h->mb.i_mb_x <= 1 ) { h->mb.mv_min_fpel[1] = -8*h->mb.i_mb_y - 4; h->mb.mv_max_fpel[1] = 8*( h->sps->i_mb_height - h->mb.i_mb_y - 1 ) + 4; @@ -154,18 +153,35 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, i_cost_bak = i_bcost; for( l = 0; l < 1 + b_bidir; l++ ) { + int mvc[4][2] = {{0}}, i_mvc; int16_t (*fenc_mv)[2] = &fenc->mv[l][i_mb_xy]; - mvc[0][0] = fenc_mv[-1][0]; - mvc[0][1] = fenc_mv[-1][1]; - mvc[1][0] = fenc_mv[-i_mb_stride][0]; - mvc[1][1] = fenc_mv[-i_mb_stride][1]; - mvc[2][0] = fenc_mv[-i_mb_stride+1][0]; - mvc[2][1] = fenc_mv[-i_mb_stride+1][1]; - mvc[3][0] = fenc_mv[-i_mb_stride-1][0]; - mvc[3][1] = fenc_mv[-i_mb_stride-1][1]; + i_mvc = 0; + if( i_mb_x > 0 ) + { + mvc[i_mvc][0] = fenc_mv[-1][0]; + mvc[i_mvc][1] = fenc_mv[-1][1]; + i_mvc++; + } + if( i_mb_y > 0 ) + { + mvc[i_mvc][0] = fenc_mv[-i_mb_stride][0]; + mvc[i_mvc][1] = fenc_mv[-i_mb_stride][1]; + i_mvc++; + if( i_mb_x < h->sps->i_mb_width - 1 ) + { + mvc[i_mvc][0] = fenc_mv[-i_mb_stride+1][0]; + mvc[i_mvc][1] = fenc_mv[-i_mb_stride+1][1]; + i_mvc++; + } + if( i_mb_x > 0 ) + { + mvc[i_mvc][0] = fenc_mv[-i_mb_stride-1][0]; + mvc[i_mvc][1] = fenc_mv[-i_mb_stride-1][1]; + i_mvc++; + } + } m[l].mvp[0] = x264_median( mvc[0][0], mvc[1][0], mvc[2][0] ); m[l].mvp[1] = x264_median( mvc[0][1], mvc[1][1], mvc[2][1] ); - i_mvc = 4; x264_me_search( h, &m[l], mvc, i_mvc ); @@ -200,8 +216,12 @@ lowres_intra_mb: } if( i_bcost != i_cost_bak ) { - if( !b_bidir ) + if( !b_bidir + && i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1 + && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1 ) + { fenc->i_intra_mbs[b-p0]++; + } if( p1 > p0+1 ) i_bcost = i_bcost * 9 / 8; // arbitray penalty for I-blocks in and after B-frames } @@ -217,6 +237,7 @@ int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, { int i_score = 0; int dist_scale_factor = 128; + int *row_satd = frames[b]->i_row_satds[b-p0][p1-b]; /* Check whether we already evaluated this frame * If we have tried this frame as P, then we have also tried @@ -235,10 +256,31 @@ int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, if( p1 != p0 ) dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0); - /* Skip the outermost ring of macroblocks, to simplify mv range and intra prediction. */ - for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ ) - for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ ) - i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor ); + /* the edge mbs seem to reduce the predictive quality of the + * whole frame's score, but are needed for a spatial distribution. */ + if( h->param.rc.i_vbv_buffer_size ) + { + for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ ) + { + row_satd[ h->mb.i_mb_y ] = 0; + for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ ) + { + int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor ); + row_satd[ h->mb.i_mb_y ] += i_mb_cost; + if( h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 && + h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 ) + { + i_score += i_mb_cost; + } + } + } + } + else + { + for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ ) + for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ ) + i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor ); + } if( b != p1 ) i_score = i_score * 100 / (120 + h->param.i_bframe_bias); @@ -376,22 +418,38 @@ void x264_slicetype_decide( x264_t *h ) int x264_rc_analyse_slice( x264_t *h ) { - int p1 = 0; x264_mb_analysis_t a; x264_frame_t *frames[X264_BFRAME_MAX+2] = { NULL, }; + int p0=0, p1, b; + int cost; - if( IS_X264_TYPE_I(h->fenc->i_type) ) - return x264_slicetype_frame_cost( h, &a, &h->fenc, 0, 0, 0 ); - - while( h->frames.current[p1] && IS_X264_TYPE_B( h->frames.current[p1]->i_type ) ) - p1++; - p1++; - if( h->fenc->i_cost_est[p1][0] >= 0 ) - return h->fenc->i_cost_est[p1][0]; - - frames[0] = h->fref0[0]; - frames[p1] = h->fenc; x264_lowres_context_init( h, &a ); - return x264_slicetype_frame_cost( h, &a, frames, 0, p1, p1 ); + if( IS_X264_TYPE_I(h->fenc->i_type) ) + { + p1 = b = 0; + } + else if( X264_TYPE_P == h->fenc->i_type ) + { + p1 = 0; + while( h->frames.current[p1] && IS_X264_TYPE_B( h->frames.current[p1]->i_type ) ) + p1++; + p1++; + b = p1; + } + else //B + { + p1 = (h->fref1[0]->i_poc - h->fref0[0]->i_poc)/2; + b = (h->fref1[0]->i_poc - h->fenc->i_poc)/2; + frames[p1] = h->fref1[0]; + } + frames[p0] = h->fref0[0]; + frames[b] = h->fenc; + + cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b ); + h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b]; + h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b]; + h->fdec->i_satd = cost; + memcpy( h->fdec->i_row_satd, h->fenc->i_row_satd, h->sps->i_mb_height * sizeof(int) ); + return cost; } -- 2.40.0