Use the large-scale lookahead capability introduced in MB-tree for ratecontrol purposes.
(Does not require MB-tree, however.)
Greatly improved quality and compliance in 1-pass VBV mode, especially in CBR; +2db OPSNR or more in some cases.
Fix some other bugs in VBV, which should improve non-lookahead mode as well.
Change the tolerance algorithm in row VBV to allow for more significant mispredictions when buffer is nearly full.
Note that due to the fixing of an extremely long-standing bug (>1 year), bitrates may change by nontrivial amounts in CRF without MB-tree.
s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
- if( p->rc.b_mb_tree )
+ if( p->rc.b_mb_tree || p->rc.i_vbv_buffer_size )
s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
if( h->param.rc.i_aq_mode )
{
CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+ CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
if( h->frames.b_have_lowres )
/* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
x264_free( frame->lowres_inter_types[j][i] );
}
x264_free( frame->f_qp_offset );
+ x264_free( frame->f_qp_offset_aq );
x264_free( frame->i_inv_qscale_factor );
x264_free( frame->i_row_bits );
x264_free( frame->i_row_qp );
int *i_row_bits;
int *i_row_qp;
float *f_qp_offset;
+ float *f_qp_offset_aq;
int b_intra_calculated;
uint16_t *i_intra_cost;
uint16_t *i_propagate_cost;
uint16_t *i_inv_qscale_factor;
+ /* vbv */
+ uint8_t i_planned_type[X264_LOOKAHEAD_MAX+1];
+ int i_planned_satd[X264_LOOKAHEAD_MAX+1];
+
/* threading */
int i_lines_completed; /* in pixels */
int i_reference_count; /* number of threads using this frame (not necessarily the number of pointers) */
h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, 51 );
h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
if( h->param.rc.i_rc_method == X264_RC_CRF )
+ {
h->param.rc.i_qp_constant = h->param.rc.f_rf_constant;
+ h->param.rc.i_bitrate = 0;
+ }
if( (h->param.rc.i_rc_method == X264_RC_CQP || h->param.rc.i_rc_method == X264_RC_CRF)
&& h->param.rc.i_qp_constant == 0 )
{
h->param.analyse.b_weighted_bipred = 0;
}
h->param.rc.i_lookahead = x264_clip3( h->param.rc.i_lookahead, 0, X264_LOOKAHEAD_MAX );
- h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, h->param.i_keyint_max );
+ {
+ int maxrate = X264_MAX( h->param.rc.i_vbv_max_bitrate, h->param.rc.i_bitrate );
+ float bufsize = maxrate ? (float)h->param.rc.i_vbv_buffer_size / maxrate : 0;
+ float fps = h->param.i_fps_num > 0 && h->param.i_fps_den > 0 ? (float) h->param.i_fps_num / h->param.i_fps_den : 25.0;
+ h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_MAX( h->param.i_keyint_max, bufsize*fps ) );
+ }
+
if( h->param.rc.b_stat_read )
h->param.rc.i_lookahead = 0;
else if( !h->param.rc.i_lookahead )
h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4;
else
h->frames.i_delay = h->param.i_bframe;
- if( h->param.rc.b_mb_tree )
+ if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size )
h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
h->frames.i_delay += h->param.i_threads - 1;
h->frames.i_delay = X264_MIN( h->frames.i_delay, X264_LOOKAHEAD_MAX );
float f_qpm; /* qp for current macroblock: precise float for AQ */
float qpa_rc; /* average of macroblocks' qp before aq */
float qpa_aq; /* average of macroblocks' qp after aq */
+ float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */
int qp_force;
/* VBV stuff */
{
int mb_xy;
memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
+ memset( frame->f_qp_offset_aq, 0, h->mb.i_mb_count * sizeof(float) );
if( h->frames.b_have_lowres )
for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
frame->i_inv_qscale_factor[mb_xy] = 256;
uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
}
- frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
+ frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] =
+ frame->f_qp_offset_aq[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
if( h->frames.b_have_lowres )
frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));
}
rc->qpa_rc += rc->f_qpm;
rc->qpa_aq += h->mb.i_qp;
- if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv)
+ if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv )
return;
h->fdec->i_row_qp[y] = rc->qpm;
int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
- float rc_tol = 1;
- float headroom = 0;
+ /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
+ float rc_tol = (buffer_left_planned / h->param.i_threads);
/* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
/* area at the top of the frame was measured inaccurately. */
if(row_bits_so_far(h,y) < 0.05 * rc->frame_size_planned)
return;
- headroom = buffer_left_planned/rc->buffer_size;
if(h->sh.i_type != SLICE_TYPE_I)
- headroom /= 2;
- rc_tol += headroom;
+ rc_tol /= 2;
if( !rc->b_vbv_min_rate )
i_qp_min = X264_MAX( i_qp_min, h->sh.i_qp );
while( rc->qpm < i_qp_max
- && (b1 > rc->frame_size_planned * rc_tol
- || (rc->buffer_fill - b1 < buffer_left_planned * 0.5)))
+ && ((b1 > rc->frame_size_planned + rc_tol) ||
+ (rc->buffer_fill - b1 < buffer_left_planned * 0.5) ||
+ (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv)) )
{
rc->qpm ++;
b1 = predict_row_size_sum( h, y, rc->qpm );
double q0 = q;
/* B-frames are not directly subject to VBV,
- * since they are controlled by the P-frames' QPs.
- * FIXME: in 2pass we could modify previous frames' QP too,
- * instead of waiting for the buffer to fill */
- if( rcc->b_vbv &&
- ( pict_type == SLICE_TYPE_P ||
- ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) )
- {
- if( rcc->buffer_fill/rcc->buffer_size < 0.5 )
- q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
- }
+ * since they are controlled by the P-frames' QPs. */
if( rcc->b_vbv && rcc->last_satd > 0 )
{
- /* Now a hard threshold to make sure the frame fits in VBV.
- * This one is mostly for I-frames. */
- double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
- double qf = 1.0;
- if( bits > rcc->buffer_fill/2 )
- qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
- q /= qf;
- bits *= qf;
- if( bits < rcc->buffer_rate/2 )
- q *= bits*2/rcc->buffer_rate;
- q = X264_MAX( q0, q );
+ /* Lookahead VBV: raise the quantizer as necessary such that no frames in
+ * the lookahead overflow and such that the buffer is in a reasonable state
+ * by the end of the lookahead. */
+ if( h->param.rc.i_lookahead )
+ {
+ int j, iterations, terminate = 0;
+
+ /* Avoid an infinite loop. */
+ for( iterations = 0; iterations < 1000 && terminate != 3; iterations++ )
+ {
+ double frame_q[3];
+ double cur_bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
+ double buffer_fill_cur = rcc->buffer_fill - cur_bits + rcc->buffer_rate;
+ double target_fill;
+ frame_q[0] = h->sh.i_type == SLICE_TYPE_I ? q * h->param.rc.f_ip_factor : q;
+ frame_q[1] = frame_q[0] * h->param.rc.f_pb_factor;
+ frame_q[2] = frame_q[0] / h->param.rc.f_ip_factor;
+
+ /* Loop over the planned future frames. */
+ for( j = 0; buffer_fill_cur >= 0 && buffer_fill_cur <= rcc->buffer_size; j++ )
+ {
+ int i_type = h->fenc->i_planned_type[j];
+ int i_satd = h->fenc->i_planned_satd[j];
+ if( i_type == X264_TYPE_AUTO )
+ break;
+ i_type = IS_X264_TYPE_I( i_type ) ? SLICE_TYPE_I : IS_X264_TYPE_B( i_type ) ? SLICE_TYPE_B : SLICE_TYPE_P;
+ cur_bits = predict_size( &rcc->pred[i_type], frame_q[i_type], i_satd );
+ buffer_fill_cur = buffer_fill_cur - cur_bits + rcc->buffer_rate;
+ }
+ /* Try to get to get the buffer at least 50% filled, but don't set an impossible goal. */
+ target_fill = X264_MIN( rcc->buffer_fill + j * rcc->buffer_rate * 0.5, rcc->buffer_size * 0.5 );
+ if( buffer_fill_cur < target_fill )
+ {
+ q *= 1.01;
+ terminate |= 1;
+ continue;
+ }
+ /* Try to get the buffer no more than 80% filled, but don't set an impossible goal. */
+ target_fill = x264_clip3f( rcc->buffer_fill - j * rcc->buffer_rate * 0.5, rcc->buffer_size * 0.8, rcc->buffer_size );
+ if( rcc->b_vbv_min_rate && buffer_fill_cur > target_fill )
+ {
+ q /= 1.01;
+ terminate |= 2;
+ continue;
+ }
+ break;
+ }
+ }
+ /* Fallback to old purely-reactive algorithm: no lookahead. */
+ else
+ {
+ if( ( pict_type == SLICE_TYPE_P ||
+ ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) &&
+ rcc->buffer_fill/rcc->buffer_size < 0.5 )
+ {
+ q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
+ }
+
+ /* Now a hard threshold to make sure the frame fits in VBV.
+ * This one is mostly for I-frames. */
+ double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
+ double qf = 1.0;
+ if( bits > rcc->buffer_fill/2 )
+ qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
+ q /= qf;
+ bits *= qf;
+ if( bits < rcc->buffer_rate/2 )
+ q *= bits*2/rcc->buffer_rate;
+ q = X264_MAX( q0, q );
+ }
/* Check B-frame complexity, and use up any bits that would
* overflow before the next P-frame. */
if( h->sh.i_type == SLICE_TYPE_P )
{
int nb = rcc->bframes;
+ double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
double pbbits = bits;
double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd );
double space;
{
q = qp2qscale( ABR_INIT_QP ) / fabs( h->param.rc.f_ip_factor );
}
+ rcc->qp_novbv = qscale2qp(q);
//FIXME use get_diff_limited_q() ?
q = clip_qscale( h, pict_type, q );
if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 )
rcc->last_qscale_for[SLICE_TYPE_P] = q;
- if( rcc->b_2pass && rcc->b_vbv)
+ if( rcc->b_2pass && rcc->b_vbv )
rcc->frame_size_planned = qscale2bits(&rce, q);
else
rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );
- if( !p0 && !p1 && !b )
+ if( p0 == p1 )
goto lowres_intra_mb;
// no need for h->mb.mv_min[]
/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
* re-running lookahead. */
-static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
- int p0, int p1, int b )
+static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
{
int i_score = 0;
int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
return i_score;
}
+static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, int b_bidir )
+{
+ int mb_index;
+ x264_emms();
+ if( b_bidir )
+ memcpy( frame->f_qp_offset, frame->f_qp_offset_aq, sizeof( frame->f_qp_offset ) );
+ else
+ {
+ for( mb_index = 0; mb_index < h->mb.i_mb_count; mb_index++ )
+ {
+ int intra_cost = (frame->i_intra_cost[mb_index] * frame->i_inv_qscale_factor[mb_index]+128)>>8;
+ if( intra_cost )
+ {
+ int propagate_cost = frame->i_propagate_cost[mb_index];
+ float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
+ /* Allow the constant to be adjusted via qcompress, since the two
+ * concepts are very similar. */
+ frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
+ }
+ }
+ }
+}
+
static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
{
x264_frame_t *refs[2] = {frames[p0],frames[p1]};
}
}
}
+
+ if( h->param.rc.i_vbv_buffer_size )
+ x264_macroblock_tree_finish( h, frames[b], b != p1 );
}
static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
int i, idx = !b_intra;
int last_nonb, cur_nonb = 1;
if( b_intra )
- x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
+ x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
i = num_frames-1;
while( i > 0 && frames[i]->i_type == X264_TYPE_B )
}
last_nonb = cur_nonb;
}
- x264_emms();
- for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+ x264_macroblock_tree_finish( h, frames[last_nonb], 0 );
+}
+
+static int x264_vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b )
+{
+ int cost = x264_slicetype_frame_cost( h, a, frames, p0, p1, b, 0 );
+ if( h->param.rc.i_aq_mode )
{
- for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
- {
- int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
- int intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
+ if( h->param.rc.b_mb_tree )
+ return x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
+ else
+ return frames[b]->i_cost_est_aq[b-p0][p1-b];
+ }
+ return cost;
+}
- if( intra_cost )
- {
- int propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
- float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
- /* Allow the constant to be adjusted via qcompress, since the two
- * concepts are very similar. */
- frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
- }
+static void x264_vbv_lookahead( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int keyframe )
+{
+ int last_nonb = 0, cur_nonb = 1, next_nonb, i, idx = 0;
+ while( cur_nonb < num_frames && frames[cur_nonb]->i_type == X264_TYPE_B )
+ cur_nonb++;
+ next_nonb = keyframe ? last_nonb : cur_nonb;
+
+ while( cur_nonb <= num_frames )
+ {
+ /* P/I cost: This shouldn't include the cost of next_nonb */
+ if( next_nonb != cur_nonb )
+ {
+ int p0 = IS_X264_TYPE_I( frames[cur_nonb]->i_type ) ? cur_nonb : last_nonb;
+ frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, p0, cur_nonb, cur_nonb );
+ frames[next_nonb]->i_planned_type[idx] = frames[cur_nonb]->i_type;
+ idx++;
}
+ /* Handle the B-frames: coded order */
+ for( i = last_nonb+1; i < cur_nonb; i++, idx++ )
+ {
+ frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, last_nonb, cur_nonb, i );
+ frames[next_nonb]->i_planned_type[idx] = X264_TYPE_B;
+ }
+ last_nonb = cur_nonb;
+ cur_nonb++;
+ while( cur_nonb <= num_frames && frames[cur_nonb]->i_type == X264_TYPE_B )
+ cur_nonb++;
}
+ frames[next_nonb]->i_planned_type[idx] = X264_TYPE_AUTO;
}
static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
frames[0] = h->frames.last_nonb;
for( j = 0; h->frames.next[j] && h->frames.next[j]->i_type == X264_TYPE_AUTO; j++ )
frames[j+1] = h->frames.next[j];
- keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
- num_frames = X264_MIN( j, keyint_limit );
- if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
+ if( !j )
return;
+ keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
+ num_frames = X264_MIN( j, keyint_limit );
+
x264_lowres_context_init( h, &a );
idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
- if( num_frames == 1 && !h->param.rc.b_mb_tree )
+ /* This is important psy-wise: if we have a non-scenecut keyframe,
+ * there will be significant visual artifacts if the frames just before
+ * go down in quality due to being referenced less, despite it being
+ * more RD-optimal. */
+ if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || h->param.rc.i_vbv_buffer_size )
+ num_frames = j;
+ else if( num_frames == 1 )
{
frames[1]->i_type = X264_TYPE_P;
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
frames[1]->i_type = idr_frame_type;
return;
}
-
- /* This is important psy-wise: if we have a non-scenecut keyframe,
- * there will be significant visual artifacts if the frames just before
- * go down in quality due to being referenced less, despite it being
- * more RD-optimal. */
- if( h->param.analyse.b_psy && h->param.rc.b_mb_tree )
- num_frames = j;
+ else if( num_frames == 0 )
+ {
+ frames[1]->i_type = idr_frame_type;
+ return;
+ }
char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
int n;
num_bframes = 0;
}
+ for( j = 1; j <= num_frames; j++ )
+ if( frames[j]->i_type == X264_TYPE_AUTO )
+ frames[j]->i_type = X264_TYPE_P;
+
/* Perform the actual macroblock tree analysis.
* Don't go farther than the lookahead parameter; this helps in short GOPs. */
if( h->param.rc.b_mb_tree )
- x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.rc.i_lookahead), keyframe );
+ x264_macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.rc.i_lookahead), keyframe );
/* Enforce keyframe limit. */
- for( j = 0; j <= num_bframes; j++ )
- if( j+1 > keyint_limit )
+ for( j = 0; j < num_frames; j++ )
+ {
+ if( (j+1)%h->param.i_keyint_max > keyint_limit )
{
if( j )
frames[j]->i_type = X264_TYPE_P;
frames[j+1]->i_type = idr_frame_type;
- reset_start = j+2;
+ if( j <= num_bframes )
+ reset_start = j+2;
break;
}
+ }
+
+ if( h->param.rc.i_vbv_buffer_size )
+ x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
/* Restore frametypes for all frames that haven't actually been decided yet. */
for( j = reset_start; j <= num_frames; j++ )
frames[b] = h->fenc;
if( h->param.rc.b_mb_tree )
- cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b );
+ cost = x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
else
{
cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
/* In AQ, use the weighted score instead. */
if( h->param.rc.i_aq_mode )
- cost = frames[b]->i_cost_est[b-p0][p1-b];
+ cost = frames[b]->i_cost_est_aq[b-p0][p1-b];
}
h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];