From: Fiona Glaser Date: Thu, 9 Feb 2012 22:23:52 +0000 (-0800) Subject: Add row-reencoding support to VBV for improved accuracy X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2535ba17b2598f4155955857c12d52a377a75517;p=libx264 Add row-reencoding support to VBV for improved accuracy Extremely accurate, possibly 100% so (I can't get it to fail even with difficult VBVs). Does not yet support rows split on slice boundaries (occurs often with slice-max-size/mbs). Still inaccurate with sliced threads, but better than before. --- diff --git a/common/macroblock.c b/common/macroblock.c index b4e6d951..6bb0566e 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -360,13 +360,11 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead ) { if( !b_lookahead ) { - for( int i = 0; i <= 4*PARAM_INTERLACED; i++ ) + for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ ) for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ ) { CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) ); h->intra_border_backup[i][j] += 16; - if( !PARAM_INTERLACED ) - h->intra_border_backup[1][j] = h->intra_border_backup[i][j]; } for( int i = 0; i <= PARAM_INTERLACED; i++ ) { @@ -404,7 +402,7 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead ) { for( int i = 0; i <= PARAM_INTERLACED; i++ ) x264_free( h->deblock_strength[i] ); - for( int i = 0; i <= 4*PARAM_INTERLACED; i++ ) + for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ ) for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ ) x264_free( h->intra_border_backup[i][j] - 16 ); } @@ -563,7 +561,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x ? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride : 16 * mb_x + height * mb_y * i_stride; pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset]; - int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0; + int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : !(mb_y&1); pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16]; int ref_pix_offset[2] = { i_pix_offset, i_pix_offset }; /* ref_pix_offset[0] references the current field and [1] the opposite field. */ @@ -576,20 +574,16 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height ); memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) ); memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) ); - if( b_mbaff ) - { - h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8]; - h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1]; - } + h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8]; + h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1]; } else { h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 ); memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) ); - if( b_mbaff ) - h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1]; + h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1]; } - if( b_mbaff ) + if( b_mbaff || h->mb.b_reencode_mb ) { for( int j = 0; j < height; j++ ) if( b_chroma ) @@ -1638,7 +1632,7 @@ static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int * For progressive mbs this is the bottom two rows, and for interlaced the * bottom row of each field. We also store samples needed for the next * mbpair in intra_border_backup[2]. */ - int backup_dst = !b_mbaff ? 0 : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2; + int backup_dst = !b_mbaff ? (mb_y&1) : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2; memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) ); if( CHROMA444 ) { @@ -1672,14 +1666,6 @@ static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int } } } - else - { - /* In progressive we update intra_border_backup in-place, so the topleft neighbor will - * no longer exist there when load_pic_pointers wants it. Move it within p_fdec instead. */ - h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[0][-FDEC_STRIDE+15]; - h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)]; - h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)]; - } } void x264_macroblock_cache_save( x264_t *h ) diff --git a/encoder/encoder.c b/encoder/encoder.c index b420544a..93842ef9 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -334,17 +334,17 @@ static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal } /* If we are within a reasonable distance of the end of the memory allocated for the bitstream, */ -/* reallocate, adding an arbitrary amount of space (100 kilobytes). */ +/* reallocate, adding an arbitrary amount of space. */ static int x264_bitstream_check_buffer( x264_t *h ) { uint8_t *bs_bak = h->out.p_bitstream; - int max_mb_size = 2500 << SLICE_MBAFF; - if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_mb_size)) || - (h->out.bs.p_end - h->out.bs.p < max_mb_size) ) + int max_row_size = (2500 << SLICE_MBAFF) * h->mb.i_mb_width; + if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_row_size)) || + (h->out.bs.p_end - h->out.bs.p < max_row_size) ) { - h->out.i_bitstream += 100000; + h->out.i_bitstream += max_row_size; CHECKED_MALLOC( h->out.p_bitstream, h->out.i_bitstream ); - h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - 100000) & ~15 ); + h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - max_row_size) & ~15 ); intptr_t delta = h->out.p_bitstream - bs_bak; h->out.bs.p_start += delta; @@ -580,7 +580,7 @@ static int x264_validate_parameters( x264_t *h, int b_open ) } h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX ); h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max ); - h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 0, QP_MAX ); + h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 2, QP_MAX ); h->param.rc.i_bitrate = x264_clip3( h->param.rc.i_bitrate, 0, 2000000 ); h->param.rc.i_vbv_buffer_size = x264_clip3( h->param.rc.i_vbv_buffer_size, 0, 2000000 ); h->param.rc.i_vbv_max_bitrate = x264_clip3( h->param.rc.i_vbv_max_bitrate, 0, 2000000 ); @@ -2059,12 +2059,20 @@ typedef struct bs_t bs; x264_cabac_t cabac; x264_frame_stat_t stat; + int last_qp; + int last_dqp; + int field_decoding_flag; } x264_bs_bak_t; static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full ) { if( full ) + { bak->stat = h->stat.frame; + bak->last_qp = h->mb.i_last_qp; + bak->last_dqp = h->mb.i_last_dqp; + bak->field_decoding_flag = h->mb.field_decoding_flag; + } else { bak->stat.i_mv_bits = h->stat.frame.i_mv_bits; @@ -2093,7 +2101,12 @@ static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak, static ALWAYS_INLINE void x264_bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full ) { if( full ) + { h->stat.frame = bak->stat; + h->mb.i_last_qp = bak->last_qp; + h->mb.i_last_dqp = bak->last_dqp; + h->mb.field_decoding_flag = bak->field_decoding_flag; + } else { h->stat.frame.i_mv_bits = bak->stat.i_mv_bits; @@ -2128,8 +2141,9 @@ static int x264_slice_write( x264_t *h ) int starting_bits = bs_pos(&h->out.bs); int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1; int b_hpel = h->fdec->b_kept_as_ref; + int orig_last_mb = h->sh.i_last_mb; uint8_t *last_emu_check; - x264_bs_bak_t bs_bak[1]; + x264_bs_bak_t bs_bak[2]; b_deblock &= b_hpel || h->param.psz_dump_yuv; bs_realign( &h->out.bs ); @@ -2175,17 +2189,18 @@ static int x264_slice_write( x264_t *h ) mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width; int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac); - if( !(i_mb_y & SLICE_MBAFF) ) + if( i_mb_x == 0 ) { if( x264_bitstream_check_buffer( h ) ) return -1; - - if( back_up_bitstream ) - x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 ); + if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size ) + x264_bitstream_backup( h, &bs_bak[1], i_skip, 1 ); + if( !h->mb.b_reencode_mb ) + x264_fdec_filter_row( h, i_mb_y, 1 ); } - if( i_mb_x == 0 && !h->mb.b_reencode_mb ) - x264_fdec_filter_row( h, i_mb_y, 1 ); + if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream ) + x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 ); if( PARAM_INTERLACED ) { @@ -2290,14 +2305,10 @@ reencode: break; } else - { h->sh.i_last_mb = mb_xy; - h->mb.b_reencode_mb = 0; - } } - else - h->mb.b_reencode_mb = 0; } + h->mb.b_reencode_mb = 0; #if HAVE_VISUALIZE if( h->param.b_visualize ) @@ -2307,6 +2318,17 @@ reencode: /* save cache */ x264_macroblock_cache_save( h ); + if( x264_ratecontrol_mb( h, mb_size ) < 0 ) + { + x264_bitstream_restore( h, &bs_bak[1], &i_skip, 1 ); + h->mb.b_reencode_mb = 1; + i_mb_x = 0; + i_mb_y = i_mb_y - SLICE_MBAFF; + h->mb.i_mb_prev_xy = i_mb_y * h->mb.i_mb_stride - 1; + h->sh.i_last_mb = orig_last_mb; + continue; + } + /* accumulate mb stats */ h->stat.frame.i_mb_count[h->mb.i_type]++; @@ -2381,8 +2403,6 @@ reencode: if( b_deblock ) x264_macroblock_deblock_strength( h ); - x264_ratecontrol_mb( h, mb_size ); - if( mb_xy == h->sh.i_last_mb ) break; diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 218f3f9c..ee2911ba 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -87,7 +87,9 @@ struct x264_ratecontrol_t int qp; /* qp for current frame */ float qpm; /* qp for current macroblock: precise float for AQ */ float qpa_rc; /* average of macroblocks' qp before aq */ + float qpa_rc_prev; int qpa_aq; /* average of macroblocks' qp after aq */ + int qpa_aq_prev; float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */ /* VBV stuff */ @@ -1335,8 +1337,8 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); - rc->qpa_rc = - rc->qpa_aq = 0; + rc->qpa_rc = rc->qpa_rc_prev = + rc->qpa_aq = rc->qpa_aq_prev = 0; rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX ); h->fdec->f_qp_avg_rc = h->fdec->f_qp_avg_aq = @@ -1400,7 +1402,7 @@ static float predict_row_size_sum( x264_t *h, int y, float qp ) * eliminate all use of qp in row ratecontrol: make it entirely qscale-based. * make this function stop being needlessly O(N^2) * update more often than once per row? */ -void x264_ratecontrol_mb( x264_t *h, int bits ) +int x264_ratecontrol_mb( x264_t *h, int bits ) { x264_ratecontrol_t *rc = h->rc; const int y = h->mb.i_mb_y; @@ -1409,13 +1411,13 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) rc->qpa_aq += h->mb.i_qp; if( h->mb.i_mb_x != h->mb.i_mb_width - 1 ) - return; + return 0; x264_emms(); rc->qpa_rc += rc->qpm * h->mb.i_mb_width; if( !rc->b_vbv ) - return; + return 0; float qscale = qp2qscale( rc->qpm ); h->fdec->f_row_qp[y] = rc->qpm; @@ -1427,19 +1429,38 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) /* update ratecontrol per-mbpair in MBAFF */ if( SLICE_MBAFF && !(y&1) ) - return; + return 0; + + /* FIXME: We don't currently support the case where there's a slice + * boundary in between. */ + int can_reencode_row = h->sh.i_first_mb <= ((h->mb.i_mb_y - SLICE_MBAFF) * h->mb.i_mb_stride); /* tweak quality based on difference from predicted size */ + float prev_row_qp = h->fdec->f_row_qp[y]; + float qp_absolute_max = h->param.rc.i_qp_max; + if( rc->rate_factor_max_increment ) + qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment ); + float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max ); + float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min ); + float step_size = 0.5f; + float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned; + float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned; + float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height ); + float size_of_other_slices = 0; + if( h->param.b_sliced_threads ) + { + float size_of_other_slices_planned = 0; + for( int i = 0; i < h->param.i_threads; i++ ) + if( h != h->thread[i] ) + { + size_of_other_slices += h->thread[i]->rc->frame_size_estimated; + size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned; + } + float weight = rc->slice_size_planned / rc->frame_size_planned; + size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned; + } if( y < h->i_threadslice_end-1 ) { - float prev_row_qp = h->fdec->f_row_qp[y]; - float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min ); - float qp_absolute_max = h->param.rc.i_qp_max; - if( rc->rate_factor_max_increment ) - qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment ); - float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max ); - float step_size = 0.5f; - /* B-frames shouldn't use lower QP than their reference frames. */ if( h->sh.i_type == SLICE_TYPE_B ) { @@ -1447,31 +1468,14 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) rc->qpm = X264_MAX( rc->qpm, qp_min ); } - float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned; - float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned; - float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height ); - float size_of_other_slices = 0; - if( h->param.b_sliced_threads ) - { - float size_of_other_slices_planned = 0; - for( int i = 0; i < h->param.i_threads; i++ ) - if( h != h->thread[i] ) - { - size_of_other_slices += h->thread[i]->rc->frame_size_estimated; - size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned; - } - float weight = rc->slice_size_planned / rc->frame_size_planned; - size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned; - } - /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */ float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance; float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices; - /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */ + /* Don't increase the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */ /* area at the top of the frame was measured inaccurately. */ if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned ) - return; + qp_max = qp_absolute_max = prev_row_qp; if( h->sh.i_type != SLICE_TYPE_I ) rc_tol *= 0.5f; @@ -1507,9 +1511,39 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) } h->rc->frame_size_estimated = b1 - size_of_other_slices; + + /* If the current row was large enough to cause a large QP jump, try re-encoding it. */ + if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row ) + { + /* Bump QP to halfway in between... close enough. */ + rc->qpm = x264_clip3f( (prev_row_qp + rc->qpm)*0.5f, prev_row_qp + 1.0f, qp_max ); + rc->qpa_rc = rc->qpa_rc_prev; + rc->qpa_aq = rc->qpa_aq_prev; + h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0; + return -1; + } } else + { h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm ); + + /* Last-ditch attempt: if the last row of the frame underflowed the VBV, + * try again. */ + if( (h->rc->frame_size_estimated + size_of_other_slices) > (rc->buffer_fill - rc->buffer_rate * max_frame_error) && + rc->qpm < qp_max && can_reencode_row ) + { + rc->qpm = qp_max; + rc->qpa_rc = rc->qpa_rc_prev; + rc->qpa_aq = rc->qpa_aq_prev; + h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0; + return -1; + } + } + + rc->qpa_rc_prev = rc->qpa_rc; + rc->qpa_aq_prev = rc->qpa_aq; + + return 0; } int x264_ratecontrol_qp( x264_t *h ) diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h index a10bf3e8..19d4197f 100644 --- a/encoder/ratecontrol.h +++ b/encoder/ratecontrol.h @@ -51,7 +51,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ); void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead ); int x264_ratecontrol_slice_type( x264_t *, int i_frame ); void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm ); -void x264_ratecontrol_mb( x264_t *, int bits ); +int x264_ratecontrol_mb( x264_t *, int bits ); int x264_ratecontrol_qp( x264_t * ); int x264_ratecontrol_mb_qp( x264_t *h ); int x264_ratecontrol_end( x264_t *, int bits, int *filler );