Extremely accurate, possibly 100% so (I can't get it to fail even with difficult VBVs).
Does not yet support rows split on slice boundaries (occurs often with slice-max-size/mbs).
Still inaccurate with sliced threads, but better than before.
{
if( !b_lookahead )
{
- for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
+ for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
{
CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
h->intra_border_backup[i][j] += 16;
- if( !PARAM_INTERLACED )
- h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
}
for( int i = 0; i <= PARAM_INTERLACED; i++ )
{
{
for( int i = 0; i <= PARAM_INTERLACED; i++ )
x264_free( h->deblock_strength[i] );
- for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
+ for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
x264_free( h->intra_border_backup[i][j] - 16 );
}
? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
: 16 * mb_x + height * mb_y * i_stride;
pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
+ int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : !(mb_y&1);
pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
/* ref_pix_offset[0] references the current field and [1] the opposite field. */
h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height );
memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
- if( b_mbaff )
- {
- h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
- h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
- }
+ h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
+ h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
}
else
{
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 );
memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
- if( b_mbaff )
- h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
+ h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
}
- if( b_mbaff )
+ if( b_mbaff || h->mb.b_reencode_mb )
{
for( int j = 0; j < height; j++ )
if( b_chroma )
* For progressive mbs this is the bottom two rows, and for interlaced the
* bottom row of each field. We also store samples needed for the next
* mbpair in intra_border_backup[2]. */
- int backup_dst = !b_mbaff ? 0 : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
+ int backup_dst = !b_mbaff ? (mb_y&1) : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
if( CHROMA444 )
{
}
}
}
- else
- {
- /* In progressive we update intra_border_backup in-place, so the topleft neighbor will
- * no longer exist there when load_pic_pointers wants it. Move it within p_fdec instead. */
- h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[0][-FDEC_STRIDE+15];
- h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)];
- h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)];
- }
}
void x264_macroblock_cache_save( x264_t *h )
}
/* If we are within a reasonable distance of the end of the memory allocated for the bitstream, */
-/* reallocate, adding an arbitrary amount of space (100 kilobytes). */
+/* reallocate, adding an arbitrary amount of space. */
static int x264_bitstream_check_buffer( x264_t *h )
{
uint8_t *bs_bak = h->out.p_bitstream;
- int max_mb_size = 2500 << SLICE_MBAFF;
- if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_mb_size)) ||
- (h->out.bs.p_end - h->out.bs.p < max_mb_size) )
+ int max_row_size = (2500 << SLICE_MBAFF) * h->mb.i_mb_width;
+ if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_row_size)) ||
+ (h->out.bs.p_end - h->out.bs.p < max_row_size) )
{
- h->out.i_bitstream += 100000;
+ h->out.i_bitstream += max_row_size;
CHECKED_MALLOC( h->out.p_bitstream, h->out.i_bitstream );
- h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - 100000) & ~15 );
+ h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - max_row_size) & ~15 );
intptr_t delta = h->out.p_bitstream - bs_bak;
h->out.bs.p_start += delta;
}
h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
- h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 0, QP_MAX );
+ h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 2, QP_MAX );
h->param.rc.i_bitrate = x264_clip3( h->param.rc.i_bitrate, 0, 2000000 );
h->param.rc.i_vbv_buffer_size = x264_clip3( h->param.rc.i_vbv_buffer_size, 0, 2000000 );
h->param.rc.i_vbv_max_bitrate = x264_clip3( h->param.rc.i_vbv_max_bitrate, 0, 2000000 );
bs_t bs;
x264_cabac_t cabac;
x264_frame_stat_t stat;
+ int last_qp;
+ int last_dqp;
+ int field_decoding_flag;
} x264_bs_bak_t;
static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full )
{
if( full )
+ {
bak->stat = h->stat.frame;
+ bak->last_qp = h->mb.i_last_qp;
+ bak->last_dqp = h->mb.i_last_dqp;
+ bak->field_decoding_flag = h->mb.field_decoding_flag;
+ }
else
{
bak->stat.i_mv_bits = h->stat.frame.i_mv_bits;
static ALWAYS_INLINE void x264_bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full )
{
if( full )
+ {
h->stat.frame = bak->stat;
+ h->mb.i_last_qp = bak->last_qp;
+ h->mb.i_last_dqp = bak->last_dqp;
+ h->mb.field_decoding_flag = bak->field_decoding_flag;
+ }
else
{
h->stat.frame.i_mv_bits = bak->stat.i_mv_bits;
int starting_bits = bs_pos(&h->out.bs);
int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
int b_hpel = h->fdec->b_kept_as_ref;
+ int orig_last_mb = h->sh.i_last_mb;
uint8_t *last_emu_check;
- x264_bs_bak_t bs_bak[1];
+ x264_bs_bak_t bs_bak[2];
b_deblock &= b_hpel || h->param.psz_dump_yuv;
bs_realign( &h->out.bs );
mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
- if( !(i_mb_y & SLICE_MBAFF) )
+ if( i_mb_x == 0 )
{
if( x264_bitstream_check_buffer( h ) )
return -1;
-
- if( back_up_bitstream )
- x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 );
+ if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size )
+ x264_bitstream_backup( h, &bs_bak[1], i_skip, 1 );
+ if( !h->mb.b_reencode_mb )
+ x264_fdec_filter_row( h, i_mb_y, 1 );
}
- if( i_mb_x == 0 && !h->mb.b_reencode_mb )
- x264_fdec_filter_row( h, i_mb_y, 1 );
+ if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
+ x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 );
if( PARAM_INTERLACED )
{
break;
}
else
- {
h->sh.i_last_mb = mb_xy;
- h->mb.b_reencode_mb = 0;
- }
}
- else
- h->mb.b_reencode_mb = 0;
}
+ h->mb.b_reencode_mb = 0;
#if HAVE_VISUALIZE
if( h->param.b_visualize )
/* save cache */
x264_macroblock_cache_save( h );
+ if( x264_ratecontrol_mb( h, mb_size ) < 0 )
+ {
+ x264_bitstream_restore( h, &bs_bak[1], &i_skip, 1 );
+ h->mb.b_reencode_mb = 1;
+ i_mb_x = 0;
+ i_mb_y = i_mb_y - SLICE_MBAFF;
+ h->mb.i_mb_prev_xy = i_mb_y * h->mb.i_mb_stride - 1;
+ h->sh.i_last_mb = orig_last_mb;
+ continue;
+ }
+
/* accumulate mb stats */
h->stat.frame.i_mb_count[h->mb.i_type]++;
if( b_deblock )
x264_macroblock_deblock_strength( h );
- x264_ratecontrol_mb( h, mb_size );
-
if( mb_xy == h->sh.i_last_mb )
break;
int qp; /* qp for current frame */
float qpm; /* qp for current macroblock: precise float for AQ */
float qpa_rc; /* average of macroblocks' qp before aq */
+ float qpa_rc_prev;
int qpa_aq; /* average of macroblocks' qp after aq */
+ int qpa_aq_prev;
float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */
/* VBV stuff */
q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
- rc->qpa_rc =
- rc->qpa_aq = 0;
+ rc->qpa_rc = rc->qpa_rc_prev =
+ rc->qpa_aq = rc->qpa_aq_prev = 0;
rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX );
h->fdec->f_qp_avg_rc =
h->fdec->f_qp_avg_aq =
* eliminate all use of qp in row ratecontrol: make it entirely qscale-based.
* make this function stop being needlessly O(N^2)
* update more often than once per row? */
-void x264_ratecontrol_mb( x264_t *h, int bits )
+int x264_ratecontrol_mb( x264_t *h, int bits )
{
x264_ratecontrol_t *rc = h->rc;
const int y = h->mb.i_mb_y;
rc->qpa_aq += h->mb.i_qp;
if( h->mb.i_mb_x != h->mb.i_mb_width - 1 )
- return;
+ return 0;
x264_emms();
rc->qpa_rc += rc->qpm * h->mb.i_mb_width;
if( !rc->b_vbv )
- return;
+ return 0;
float qscale = qp2qscale( rc->qpm );
h->fdec->f_row_qp[y] = rc->qpm;
/* update ratecontrol per-mbpair in MBAFF */
if( SLICE_MBAFF && !(y&1) )
- return;
+ return 0;
+
+ /* FIXME: We don't currently support the case where there's a slice
+ * boundary in between. */
+ int can_reencode_row = h->sh.i_first_mb <= ((h->mb.i_mb_y - SLICE_MBAFF) * h->mb.i_mb_stride);
/* tweak quality based on difference from predicted size */
+ float prev_row_qp = h->fdec->f_row_qp[y];
+ float qp_absolute_max = h->param.rc.i_qp_max;
+ if( rc->rate_factor_max_increment )
+ qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
+ float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
+ float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
+ float step_size = 0.5f;
+ float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
+ float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
+ float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
+ float size_of_other_slices = 0;
+ if( h->param.b_sliced_threads )
+ {
+ float size_of_other_slices_planned = 0;
+ for( int i = 0; i < h->param.i_threads; i++ )
+ if( h != h->thread[i] )
+ {
+ size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
+ size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
+ }
+ float weight = rc->slice_size_planned / rc->frame_size_planned;
+ size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
+ }
if( y < h->i_threadslice_end-1 )
{
- float prev_row_qp = h->fdec->f_row_qp[y];
- float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
- float qp_absolute_max = h->param.rc.i_qp_max;
- if( rc->rate_factor_max_increment )
- qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
- float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
- float step_size = 0.5f;
-
/* B-frames shouldn't use lower QP than their reference frames. */
if( h->sh.i_type == SLICE_TYPE_B )
{
rc->qpm = X264_MAX( rc->qpm, qp_min );
}
- float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
- float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
- float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
- float size_of_other_slices = 0;
- if( h->param.b_sliced_threads )
- {
- float size_of_other_slices_planned = 0;
- for( int i = 0; i < h->param.i_threads; i++ )
- if( h != h->thread[i] )
- {
- size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
- size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
- }
- float weight = rc->slice_size_planned / rc->frame_size_planned;
- size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
- }
-
/* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
+ /* Don't increase the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
/* area at the top of the frame was measured inaccurately. */
if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned )
- return;
+ qp_max = qp_absolute_max = prev_row_qp;
if( h->sh.i_type != SLICE_TYPE_I )
rc_tol *= 0.5f;
}
h->rc->frame_size_estimated = b1 - size_of_other_slices;
+
+ /* If the current row was large enough to cause a large QP jump, try re-encoding it. */
+ if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row )
+ {
+ /* Bump QP to halfway in between... close enough. */
+ rc->qpm = x264_clip3f( (prev_row_qp + rc->qpm)*0.5f, prev_row_qp + 1.0f, qp_max );
+ rc->qpa_rc = rc->qpa_rc_prev;
+ rc->qpa_aq = rc->qpa_aq_prev;
+ h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
+ return -1;
+ }
}
else
+ {
h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
+
+ /* Last-ditch attempt: if the last row of the frame underflowed the VBV,
+ * try again. */
+ if( (h->rc->frame_size_estimated + size_of_other_slices) > (rc->buffer_fill - rc->buffer_rate * max_frame_error) &&
+ rc->qpm < qp_max && can_reencode_row )
+ {
+ rc->qpm = qp_max;
+ rc->qpa_rc = rc->qpa_rc_prev;
+ rc->qpa_aq = rc->qpa_aq_prev;
+ h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
+ return -1;
+ }
+ }
+
+ rc->qpa_rc_prev = rc->qpa_rc;
+ rc->qpa_aq_prev = rc->qpa_aq;
+
+ return 0;
}
int x264_ratecontrol_qp( x264_t *h )
void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead );
int x264_ratecontrol_slice_type( x264_t *, int i_frame );
void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm );
-void x264_ratecontrol_mb( x264_t *, int bits );
+int x264_ratecontrol_mb( x264_t *, int bits );
int x264_ratecontrol_qp( x264_t * );
int x264_ratecontrol_mb_qp( x264_t *h );
int x264_ratecontrol_end( x264_t *, int bits, int *filler );