From 4ccbb1998c81c5533c17da91aa67b62a5d9857c8 Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Mon, 31 Aug 2009 06:36:41 -0700 Subject: [PATCH] Multi-slice encoding support Slicing support is available through three methods (which can be mixed): --slices sets a number of slices per frame and ensures rectangular slices (required for Blu-ray). Overridden by either of the following options: --slice-max-mbs sets a maximum number of macroblocks per slice. --slice-max-size sets a maximum slice size, in bytes (includes NAL overhead). Implement macroblock re-encoding support to allow highly accurate slice size limitation. Might be useful for other things in the future, too. --- Makefile | 6 +- common/common.c | 21 +++- common/common.h | 14 +-- common/macroblock.c | 4 +- encoder/analyse.c | 46 +++++---- encoder/encoder.c | 232 +++++++++++++++++++++++++++++++----------- encoder/ratecontrol.c | 14 ++- encoder/slicetype.c | 1 - x264.c | 21 ++-- x264.h | 7 +- 10 files changed, 261 insertions(+), 105 deletions(-) diff --git a/Makefile b/Makefile index 2243775d..0f34736e 100644 --- a/Makefile +++ b/Makefile @@ -119,10 +119,10 @@ endif SRC2 = $(SRCS) $(SRCCLI) # These should cover most of the important codepaths OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb -OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0 -OPT2 = --crf 26 -b4 -m5 -r2 --me hex --cqm jvt --nr 100 --psnr --no-mixed-refs --b-adapt 2 +OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0 --slice-max-mbs 50 +OPT2 = --crf 26 -b4 -m5 -r2 --me hex --cqm jvt --nr 100 --psnr --no-mixed-refs --b-adapt 2 --slice-max-size 1500 OPT3 = --crf 18 -b3 -m9 -r5 --me umh -t1 -A all --b-pyramid --direct auto --no-fast-pskip --no-mbtree -OPT4 = --crf 22 -b3 -m7 -r4 --me esa -t2 -A all --psy-rd 1.0:1.0 +OPT4 = --crf 22 -b3 -m7 -r4 --me esa -t2 -A all --psy-rd 1.0:1.0 --slices 4 OPT5 = --frames 50 --crf 24 -b3 -m10 -r3 --me tesa -t2 OPT6 = --frames 50 -q0 -m9 -r2 --me hex -Aall OPT7 = --frames 50 -q0 -m2 -r1 --me hex --no-cabac diff --git a/common/common.c b/common/common.c index 1f010306..c0a56e3d 100644 --- a/common/common.c +++ b/common/common.c @@ -62,6 +62,9 @@ void x264_param_default( x264_param_t *param ) param->i_fps_num = 25; param->i_fps_den = 1; param->i_level_idc = -1; + param->i_slice_max_size = 0; + param->i_slice_max_mbs = 0; + param->i_slice_count = 0; /* Encoder parameters */ param->i_frame_reference = 3; @@ -370,6 +373,12 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) else p->b_deblocking_filter = atobool(value); } + OPT("slice-max-size") + p->i_slice_max_size = atoi(value); + OPT("slice-max-mbs") + p->i_slice_max_mbs = atoi(value); + OPT("slices") + p->i_slice_count = atoi(value); OPT("cabac") p->b_cabac = atobool(value); OPT("cabac-idc") @@ -773,9 +782,9 @@ void x264_reduce_fraction( int *n, int *d ) c = a % b; while(c) { - a = b; - b = c; - c = a % b; + a = b; + b = c; + c = a % b; } *n /= b; *d /= b; @@ -851,6 +860,12 @@ char *x264_param2string( x264_param_t *p, int b_res ) s += sprintf( s, " deadzone=%d,%d", p->analyse.i_luma_deadzone[0], p->analyse.i_luma_deadzone[1] ); s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset ); s += sprintf( s, " threads=%d", p->i_threads ); + if( p->i_slice_count ) + s += sprintf( s, " slices=%d", p->i_slice_count ); + if( p->i_slice_max_size ) + s += sprintf( s, " slice_max_size=%d", p->i_slice_max_size ); + if( p->i_slice_max_mbs ) + s += sprintf( s, " slice_max_mbs=%d", p->i_slice_max_mbs ); s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction ); s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate ); s += sprintf( s, " mbaff=%d", p->b_interlaced ); diff --git a/common/common.h b/common/common.h index fa0f9c83..574040d0 100644 --- a/common/common.h +++ b/common/common.h @@ -50,8 +50,6 @@ do {\ #define X264_BFRAME_MAX 16 #define X264_THREAD_MAX 128 -#define X264_SLICE_MAX 4 -#define X264_NAL_MAX (4 + X264_SLICE_MAX) #define X264_PCM_COST (386*8) #define X264_LOOKAHEAD_MAX 250 @@ -68,6 +66,7 @@ do {\ #include #include #include +#include #include "x264.h" #include "bs.h" #include "set.h" @@ -293,7 +292,8 @@ struct x264_t struct { int i_nal; - x264_nal_t nal[X264_NAL_MAX]; + int i_nals_allocated; + x264_nal_t *nal; int i_bitstream; /* size of p_bitstream */ uint8_t *p_bitstream; /* will hold data for all nal */ bs_t bs; @@ -488,6 +488,8 @@ struct x264_t /* skip flag for motion compensation */ /* if we've already done MC, we don't need to do it again */ int b_skip_mc; + /* set to true if we are re-encoding a macroblock. */ + int b_reencode_mb; struct { @@ -623,9 +625,9 @@ struct x264_t /* Cumulated stats */ /* per slice info */ - int i_slice_count[5]; - int64_t i_slice_size[5]; - double f_slice_qp[5]; + int i_frame_count[5]; + int64_t i_frame_size[5]; + double f_frame_qp[5]; int i_consecutive_bframes[X264_BFRAME_MAX+1]; /* */ int64_t i_ssd_global[5]; diff --git a/common/macroblock.c b/common/macroblock.c index f8b20c79..790dde22 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -850,7 +850,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, w ); memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 ); - if( h->mb.b_interlaced ) + if( h->mb.b_interlaced || h->mb.b_reencode_mb ) { const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset]; for( j = 0; j < w; j++ ) @@ -1016,7 +1016,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) + !!(h->mb.i_neighbour & MB_TOP); } - if( !h->mb.b_interlaced ) + if( !h->mb.b_interlaced && !h->mb.b_reencode_mb ) { copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE ); copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE ); diff --git a/encoder/analyse.c b/encoder/analyse.c index 7661031d..deca43b0 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -24,7 +24,6 @@ #define _ISOC99_SOURCE #include -#include #ifndef _MSC_VER #include #endif @@ -457,23 +456,30 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) /* Max = 4 */ static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count ) { - if( i_neighbour & MB_TOPLEFT ) + int b_top = i_neighbour & MB_TOP; + int b_left = i_neighbour & MB_LEFT; + if( b_top && b_left ) { /* top and left available */ *mode++ = I_PRED_16x16_V; *mode++ = I_PRED_16x16_H; *mode++ = I_PRED_16x16_DC; - *mode++ = I_PRED_16x16_P; - *pi_count = 4; + *pi_count = 3; + if( i_neighbour & MB_TOPLEFT ) + { + /* top left available*/ + *mode++ = I_PRED_16x16_P; + *pi_count = 4; + } } - else if( i_neighbour & MB_LEFT ) + else if( b_left ) { /* left available*/ *mode++ = I_PRED_16x16_DC_LEFT; *mode++ = I_PRED_16x16_H; *pi_count = 2; } - else if( i_neighbour & MB_TOP ) + else if( b_top ) { /* top available*/ *mode++ = I_PRED_16x16_DC_TOP; @@ -491,23 +497,30 @@ static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, i /* Max = 4 */ static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mode, int *pi_count ) { - if( i_neighbour & MB_TOPLEFT ) + int b_top = i_neighbour & MB_TOP; + int b_left = i_neighbour & MB_LEFT; + if( b_top && b_left ) { /* top and left available */ *mode++ = I_PRED_CHROMA_V; *mode++ = I_PRED_CHROMA_H; *mode++ = I_PRED_CHROMA_DC; - *mode++ = I_PRED_CHROMA_P; - *pi_count = 4; + *pi_count = 3; + if( i_neighbour & MB_TOPLEFT ) + { + /* top left available */ + *mode++ = I_PRED_CHROMA_P; + *pi_count = 4; + } } - else if( i_neighbour & MB_LEFT ) + else if( b_left ) { /* left available*/ *mode++ = I_PRED_CHROMA_DC_LEFT; *mode++ = I_PRED_CHROMA_H; *pi_count = 2; } - else if( i_neighbour & MB_TOP ) + else if( b_top ) { /* top available*/ *mode++ = I_PRED_CHROMA_DC_TOP; @@ -526,10 +539,9 @@ static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mod static void predict_4x4_mode_available( unsigned int i_neighbour, int *mode, int *pi_count ) { - int b_l = i_neighbour & MB_LEFT; - int b_t = i_neighbour & MB_TOP; - - if( b_l && b_t ) + int b_top = i_neighbour & MB_TOP; + int b_left = i_neighbour & MB_LEFT; + if( b_top && b_left ) { *pi_count = 6; *mode++ = I_PRED_4x4_DC; @@ -546,14 +558,14 @@ static void predict_4x4_mode_available( unsigned int i_neighbour, *mode++ = I_PRED_4x4_VL; *mode++ = I_PRED_4x4_HU; } - else if( b_l ) + else if( b_left ) { *mode++ = I_PRED_4x4_DC_LEFT; *mode++ = I_PRED_4x4_H; *mode++ = I_PRED_4x4_HU; *pi_count = 3; } - else if( b_t ) + else if( b_top ) { *mode++ = I_PRED_4x4_DC_TOP; *mode++ = I_PRED_4x4_V; diff --git a/encoder/encoder.c b/encoder/encoder.c index 841bfe9d..8cec1552 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -88,7 +88,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh, sh->pps = pps; sh->i_first_mb = 0; - sh->i_last_mb = h->sps->i_mb_width * h->sps->i_mb_height; + sh->i_last_mb = h->mb.i_mb_count - 1; sh->i_pps_id = pps->i_id; sh->i_frame_num = i_frame; @@ -180,7 +180,7 @@ static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal if( !sh->sps->b_frame_mbs_only ) { bs_write1( s, sh->b_field_pic ); - if ( sh->b_field_pic ) + if( sh->b_field_pic ) bs_write1( s, sh->b_bottom_field ); } @@ -466,6 +466,23 @@ static int x264_validate_parameters( x264_t *h ) h->param.i_width, h->param.i_height ); } + int max_slices = (h->param.i_height+((16<param.b_interlaced)-1))/(16<param.b_interlaced); + h->param.i_slice_count = x264_clip3( h->param.i_slice_count, 0, max_slices ); + h->param.i_slice_max_size = X264_MAX( h->param.i_slice_max_size, 0 ); + h->param.i_slice_max_mbs = X264_MAX( h->param.i_slice_max_mbs, 0 ); + if( h->param.b_interlaced && h->param.i_slice_max_size ) + { + x264_log( h, X264_LOG_WARNING, "interlaced + slice-max-size is not implemented\n" ); + h->param.i_slice_max_size = 0; + } + if( h->param.b_interlaced && h->param.i_slice_max_mbs ) + { + x264_log( h, X264_LOG_WARNING, "interlaced + slice-max-mbs is not implemented\n" ); + h->param.i_slice_max_mbs = 0; + } + if( h->param.i_slice_max_mbs || h->param.i_slice_max_size ) + h->param.i_slice_count = 0; + h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 ); if( h->param.i_keyint_max <= 0 ) h->param.i_keyint_max = 1; @@ -855,6 +872,9 @@ x264_t *x264_encoder_open ( x264_param_t *param ) if( !h->thread[i]->fdec ) goto fail; CHECKED_MALLOC( h->thread[i]->out.p_bitstream, h->out.i_bitstream ); + /* Start each thread with room for 8 NAL units; it'll realloc later if needed. */ + CHECKED_MALLOC( h->thread[i]->out.nal, 8*sizeof(x264_nal_t) ); + h->thread[i]->out.i_nals_allocated = 8; if( x264_macroblock_cache_init( h->thread[i] ) < 0 ) goto fail; } @@ -931,6 +951,9 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param ) COPY( analyse.b_transform_8x8 ); if( h->frames.i_max_ref1 > 1 ) COPY( b_bframe_pyramid ); + COPY( i_slice_max_size ); + COPY( i_slice_max_mbs ); + COPY( i_slice_count ); #undef COPY mbcmp_init( h ); @@ -949,11 +972,24 @@ static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc ) nal->i_payload= 0; nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8]; } -static void x264_nal_end( x264_t *h ) +static int x264_nal_end( x264_t *h ) { x264_nal_t *nal = &h->out.nal[h->out.i_nal]; nal->i_payload = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8] - nal->p_payload; h->out.i_nal++; + + /* if number of allocated nals is not enough, re-allocate a larger one. */ + if( h->out.i_nal >= h->out.i_nals_allocated ) + { + x264_nal_t *new_out = x264_malloc( sizeof(x264_nal_t) * (h->out.i_nals_allocated*2) ); + if( !new_out ) + return -1; + memcpy( new_out, h->out.nal, sizeof(x264_nal_t) * (h->out.i_nals_allocated) ); + x264_free( h->out.nal ); + h->out.nal = new_out; + h->out.i_nals_allocated *= 2; + } + return 0; } /**************************************************************************** @@ -972,17 +1008,20 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal ) x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); if( x264_sei_version_write( h, &h->out.bs ) ) return -1; - x264_nal_end( h ); + if( x264_nal_end( h ) ) + return -1; /* generate sequence parameters */ x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); x264_sps_write( &h->out.bs, h->sps ); - x264_nal_end( h ); + if( x264_nal_end( h ) ) + return -1; /* generate picture parameters */ x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); x264_pps_write( &h->out.bs, h->pps ); - x264_nal_end( h ); + if( x264_nal_end( h ) ) + return -1; bs_flush( &h->out.bs ); } /* now set output*/ @@ -1231,10 +1270,13 @@ static int x264_slice_write( x264_t *h ) { int i_skip; int mb_xy, i_mb_x, i_mb_y; - int i, i_list, i_ref; - - /* init stats */ - memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); + int i, i_list, i_ref, i_skip_bak = 0; /* Shut up GCC. */ + bs_t bs_bak; + x264_cabac_t cabac_bak; + uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */ + /* Assume no more than 3 bytes of NALU escaping. */ + int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-3-NALU_OVERHEAD)*8 : INT_MAX; + int starting_bits = bs_pos(&h->out.bs); /* Slice */ x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc ); @@ -1257,11 +1299,29 @@ static int x264_slice_write( x264_t *h ) i_mb_x = h->sh.i_first_mb % h->sps->i_mb_width; i_skip = 0; - while( (mb_xy = i_mb_x + i_mb_y * h->sps->i_mb_width) < h->sh.i_last_mb ) + while( (mb_xy = i_mb_x + i_mb_y * h->sps->i_mb_width) <= h->sh.i_last_mb ) { int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac); + if( h->param.i_slice_max_size > 0 ) + { + /* We don't need the contexts because flushing the CABAC encoder has no context + * dependency and macroblocks are only re-encoded in the case where a slice is + * ended (and thus the content of all contexts are thrown away). */ + if( h->param.b_cabac ) + { + memcpy( &cabac_bak, &h->cabac, offsetof(x264_cabac_t, f8_bits_encoded) ); + /* x264's CABAC writer modifies the previous byte during carry, so it has to be + * backed up. */ + cabac_prevbyte_bak = h->cabac.p[-1]; + } + else + { + bs_bak = h->out.bs; + i_skip_bak = i_skip; + } + } - if( i_mb_x == 0 ) + if( i_mb_x == 0 && !h->mb.b_reencode_mb ) x264_fdec_filter_row( h, i_mb_y ); /* load cache */ @@ -1309,6 +1369,37 @@ static int x264_slice_write( x264_t *h ) } } + int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac); + int mb_size = total_bits - mb_spos; + + /* We'll just re-encode this last macroblock if we go over the max slice size. */ + if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb ) + { + if( mb_xy != h->sh.i_first_mb ) + { + if( h->param.b_cabac ) + { + memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) ); + h->cabac.p[-1] = cabac_prevbyte_bak; + } + else + { + h->out.bs = bs_bak; + i_skip = i_skip_bak; + } + h->mb.b_reencode_mb = 1; + h->sh.i_last_mb = mb_xy-1; + break; + } + else + { + h->sh.i_last_mb = mb_xy; + h->mb.b_reencode_mb = 0; + } + } + else + h->mb.b_reencode_mb = 0; + #if VISUALIZE if( h->param.b_visualize ) x264_visualize_mb( h ); @@ -1350,7 +1441,7 @@ static int x264_slice_write( x264_t *h ) h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8; } - x264_ratecontrol_mb( h, bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) - mb_spos ); + x264_ratecontrol_mb( h, mb_size ); if( h->sh.b_mbaff ) { @@ -1359,7 +1450,7 @@ static int x264_slice_write( x264_t *h ) } else i_mb_x++; - if(i_mb_x == h->sps->i_mb_width) + if( i_mb_x == h->sps->i_mb_width ) { i_mb_y++; i_mb_x = 0; @@ -1379,16 +1470,18 @@ static int x264_slice_write( x264_t *h ) bs_rbsp_trailing( &h->out.bs ); bs_flush( &h->out.bs ); } + if( x264_nal_end( h ) ) + return -1; - x264_nal_end( h ); - - x264_fdec_filter_row( h, h->sps->i_mb_height ); + if( h->sh.i_last_mb == h->mb.i_mb_count-1 ) + { + h->stat.frame.i_misc_bits = bs_pos( &h->out.bs ) + + (h->out.i_nal*NALU_OVERHEAD * 8) + - h->stat.frame.i_tex_bits + - h->stat.frame.i_mv_bits; + x264_fdec_filter_row( h, h->sps->i_mb_height ); + } - /* Compute misc bits */ - h->stat.frame.i_misc_bits = bs_pos( &h->out.bs ) - + NALU_OVERHEAD * 8 - - h->stat.frame.i_tex_bits - - h->stat.frame.i_mv_bits; return 0; } @@ -1416,12 +1509,13 @@ static void x264_thread_sync_stat( x264_t *dst, x264_t *src ) { if( dst == src ) return; - memcpy( &dst->stat.i_slice_count, &src->stat.i_slice_count, sizeof(dst->stat) - sizeof(dst->stat.frame) ); + memcpy( &dst->stat.i_frame_count, &src->stat.i_frame_count, sizeof(dst->stat) - sizeof(dst->stat.frame) ); } static void *x264_slices_write( x264_t *h ) { - int i_frame_size; + int i_frame_size = 0; + int i_slice_num = 0; #ifdef HAVE_MMX /* Misalign mask has to be set separately for each thread. */ @@ -1435,9 +1529,28 @@ static void *x264_slices_write( x264_t *h ) return (void *)-1; #endif - if( x264_stack_align( x264_slice_write, h ) ) - return (void *)-1; - i_frame_size = h->out.nal[h->out.i_nal-1].i_payload; + /* init stats */ + memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); + h->mb.b_reencode_mb = 0; + while( h->sh.i_first_mb < h->mb.i_mb_count ) + { + h->sh.i_last_mb = h->mb.i_mb_count - 1; + if( h->param.i_slice_max_mbs ) + h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1; + else if( h->param.i_slice_count ) + { + x264_emms(); + i_slice_num++; + double height = h->sps->i_mb_height >> h->param.b_interlaced; + int width = h->sps->i_mb_width << h->param.b_interlaced; + h->sh.i_last_mb = (int)(height * i_slice_num / h->param.i_slice_count + 0.5) * width - 1; + } + h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, h->mb.i_mb_count - 1 ); + if( x264_stack_align( x264_slice_write, h ) ) + return (void *)-1; + h->sh.i_first_mb = h->sh.i_last_mb + 1; + i_frame_size += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; + } #if VISUALIZE if( h->param.b_visualize ) @@ -1678,7 +1791,8 @@ int x264_encoder_encode( x264_t *h, x264_nal_start(h, NAL_AUD, NAL_PRIORITY_DISPOSABLE); bs_write(&h->out.bs, 3, pic_type); bs_rbsp_trailing(&h->out.bs); - x264_nal_end(h); + if( x264_nal_end( h ) ) + return -1; } h->i_nal_type = i_nal_type; @@ -1693,18 +1807,21 @@ int x264_encoder_encode( x264_t *h, x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); if( x264_sei_version_write( h, &h->out.bs ) ) return -1; - x264_nal_end( h ); + if( x264_nal_end( h ) ) + return -1; } /* generate sequence parameters */ x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); x264_sps_write( &h->out.bs, h->sps ); - x264_nal_end( h ); + if( x264_nal_end( h ) ) + return -1; /* generate picture parameters */ x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); x264_pps_write( &h->out.bs, h->pps ); - x264_nal_end( h ); + if( x264_nal_end( h ) ) + return -1; } /* Write frame */ @@ -1781,9 +1898,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, x264_thread_sync_stat( h, h->thread[0] ); /* Slice stat */ - h->stat.i_slice_count[h->sh.i_type]++; - h->stat.i_slice_size[h->sh.i_type] += h->out.i_frame_size + NALU_OVERHEAD; - h->stat.f_slice_qp[h->sh.i_type] += h->fdec->f_qp_avg_aq; + h->stat.i_frame_count[h->sh.i_type]++; + h->stat.i_frame_size[h->sh.i_type] += h->out.i_frame_size + h->out.i_nal*NALU_OVERHEAD; + h->stat.f_frame_qp[h->sh.i_type] += h->fdec->f_qp_avg_aq; for( i = 0; i < X264_MBTYPE_MAX; i++ ) h->stat.i_mb_count[h->sh.i_type][i] += h->stat.frame.i_mb_count[i]; @@ -1930,17 +2047,17 @@ void x264_encoder_close ( x264_t *h ) static const char *slice_name[] = { "P", "B", "I", "SP", "SI" }; int i_slice = slice_order[i]; - if( h->stat.i_slice_count[i_slice] > 0 ) + if( h->stat.i_frame_count[i_slice] > 0 ) { - const int i_count = h->stat.i_slice_count[i_slice]; + const int i_count = h->stat.i_frame_count[i_slice]; if( h->param.analyse.b_psnr ) { x264_log( h, X264_LOG_INFO, - "slice %s:%-5d Avg QP:%5.2f size:%6.0f PSNR Mean Y:%5.2f U:%5.2f V:%5.2f Avg:%5.2f Global:%5.2f\n", + "frame %s:%-5d Avg QP:%5.2f size:%6.0f PSNR Mean Y:%5.2f U:%5.2f V:%5.2f Avg:%5.2f Global:%5.2f\n", slice_name[i_slice], i_count, - h->stat.f_slice_qp[i_slice] / i_count, - (double)h->stat.i_slice_size[i_slice] / i_count, + h->stat.f_frame_qp[i_slice] / i_count, + (double)h->stat.i_frame_size[i_slice] / i_count, h->stat.f_psnr_mean_y[i_slice] / i_count, h->stat.f_psnr_mean_u[i_slice] / i_count, h->stat.f_psnr_mean_v[i_slice] / i_count, h->stat.f_psnr_average[i_slice] / i_count, x264_psnr( h->stat.i_ssd_global[i_slice], i_count * i_yuv_size ) ); @@ -1948,15 +2065,15 @@ void x264_encoder_close ( x264_t *h ) else { x264_log( h, X264_LOG_INFO, - "slice %s:%-5d Avg QP:%5.2f size:%6.0f\n", + "frame %s:%-5d Avg QP:%5.2f size:%6.0f\n", slice_name[i_slice], i_count, - h->stat.f_slice_qp[i_slice] / i_count, - (double)h->stat.i_slice_size[i_slice] / i_count ); + h->stat.f_frame_qp[i_slice] / i_count, + (double)h->stat.i_frame_size[i_slice] / i_count ); } } } - if( h->param.i_bframe && h->stat.i_slice_count[SLICE_TYPE_P] ) + if( h->param.i_bframe && h->stat.i_frame_count[SLICE_TYPE_P] ) { char *p = buf; int den = 0; @@ -1976,17 +2093,17 @@ void x264_encoder_close ( x264_t *h ) } /* MB types used */ - if( h->stat.i_slice_count[SLICE_TYPE_I] > 0 ) + if( h->stat.i_frame_count[SLICE_TYPE_I] > 0 ) { int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I]; - double i_count = h->stat.i_slice_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0; + double i_count = h->stat.i_frame_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0; x264_print_intra( i_mb_count, i_count, b_print_pcm, buf ); x264_log( h, X264_LOG_INFO, "mb I %s\n", buf ); } - if( h->stat.i_slice_count[SLICE_TYPE_P] > 0 ) + if( h->stat.i_frame_count[SLICE_TYPE_P] > 0 ) { int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_P]; - double i_count = h->stat.i_slice_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0; + double i_count = h->stat.i_frame_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0; int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_P]; x264_print_intra( i_mb_count, i_count, b_print_pcm, buf ); x264_log( h, X264_LOG_INFO, @@ -1999,10 +2116,10 @@ void x264_encoder_close ( x264_t *h ) i_mb_size[PIXEL_4x4] / (i_count*4), i_mb_count[P_SKIP] / i_count ); } - if( h->stat.i_slice_count[SLICE_TYPE_B] > 0 ) + if( h->stat.i_frame_count[SLICE_TYPE_B] > 0 ) { int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_B]; - double i_count = h->stat.i_slice_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0; + double i_count = h->stat.i_frame_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0; double i_mb_list_count; int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_B]; int64_t list_count[3] = {0}; /* 0 == L0, 1 == L1, 2 == BI */ @@ -2035,7 +2152,7 @@ void x264_encoder_close ( x264_t *h ) x264_ratecontrol_summary( h ); - if( h->stat.i_slice_count[SLICE_TYPE_I] + h->stat.i_slice_count[SLICE_TYPE_P] + h->stat.i_slice_count[SLICE_TYPE_B] > 0 ) + if( h->stat.i_frame_count[SLICE_TYPE_I] + h->stat.i_frame_count[SLICE_TYPE_P] + h->stat.i_frame_count[SLICE_TYPE_B] > 0 ) { #define SUM3(p) (p[SLICE_TYPE_I] + p[SLICE_TYPE_P] + p[SLICE_TYPE_B]) #define SUM3b(p,o) (p[SLICE_TYPE_I][o] + p[SLICE_TYPE_P][o] + p[SLICE_TYPE_B][o]) @@ -2043,12 +2160,12 @@ void x264_encoder_close ( x264_t *h ) int64_t i_intra = i_i8x8 + SUM3b( h->stat.i_mb_count, I_4x4 ) + SUM3b( h->stat.i_mb_count, I_16x16 ); int64_t i_all_intra = i_intra + SUM3b( h->stat.i_mb_count, I_PCM); - const int i_count = h->stat.i_slice_count[SLICE_TYPE_I] + - h->stat.i_slice_count[SLICE_TYPE_P] + - h->stat.i_slice_count[SLICE_TYPE_B]; + const int i_count = h->stat.i_frame_count[SLICE_TYPE_I] + + h->stat.i_frame_count[SLICE_TYPE_P] + + h->stat.i_frame_count[SLICE_TYPE_B]; int64_t i_mb_count = i_count * h->mb.i_mb_count; float fps = (float) h->param.i_fps_num / h->param.i_fps_den; - float f_bitrate = fps * SUM3(h->stat.i_slice_size) / i_count / 125; + float f_bitrate = fps * SUM3(h->stat.i_frame_size) / i_count / 125; if( h->pps->b_transform_8x8_mode ) { @@ -2058,11 +2175,11 @@ void x264_encoder_close ( x264_t *h ) } if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO - && h->stat.i_slice_count[SLICE_TYPE_B] ) + && h->stat.i_frame_count[SLICE_TYPE_B] ) { x264_log( h, X264_LOG_INFO, "direct mvs spatial:%.1f%% temporal:%.1f%%\n", - h->stat.i_direct_frames[1] * 100. / h->stat.i_slice_count[SLICE_TYPE_B], - h->stat.i_direct_frames[0] * 100. / h->stat.i_slice_count[SLICE_TYPE_B] ); + h->stat.i_direct_frames[1] * 100. / h->stat.i_frame_count[SLICE_TYPE_B], + h->stat.i_direct_frames[0] * 100. / h->stat.i_frame_count[SLICE_TYPE_B] ); } x264_log( h, X264_LOG_INFO, "coded y,uvDC,uvAC intra:%.1f%% %.1f%% %.1f%% inter:%.1f%% %.1f%% %.1f%%\n", @@ -2168,6 +2285,7 @@ void x264_encoder_close ( x264_t *h ) x264_macroblock_cache_end( h->thread[i] ); x264_free( h->thread[i]->out.p_bitstream ); + x264_free( h->thread[i]->out.nal); x264_free( h->thread[i] ); } } diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index d33b00b6..ca19d64a 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -27,8 +27,6 @@ #define _ISOC99_SOURCE #undef NDEBUG // always check asserts, the speed effect is far too small to disable them #include -#include -#include #include "common/common.h" #include "common/cpu.h" @@ -1052,7 +1050,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) update_predictor( rc->row_pred, qp2qscale(rc->qpm), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] ); /* tweak quality based on difference from predicted size */ - if( y < h->sps->i_mb_height-1 && h->stat.i_slice_count[h->sh.i_type] > 0 ) + if( y < h->sps->i_mb_height-1 && h->stat.i_frame_count[h->sh.i_type] > 0 ) { int prev_row_qp = h->fdec->i_row_qp[y]; int b0 = predict_row_size_sum( h, y, rc->qpm ); @@ -1127,8 +1125,8 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num ) * So just calculate the average QP used so far. */ int i; - h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24 - : 1 + h->stat.f_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P]; + h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24 + : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P]; rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 ); rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 ); rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 ); @@ -1572,9 +1570,9 @@ static float rate_estimate_qscale( x264_t *h ) int pict_type = h->sh.i_type; double lmin = rcc->lmin[pict_type]; double lmax = rcc->lmax[pict_type]; - int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I] - + h->stat.i_slice_size[SLICE_TYPE_P] - + h->stat.i_slice_size[SLICE_TYPE_B]); + int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I] + + h->stat.i_frame_size[SLICE_TYPE_P] + + h->stat.i_frame_size[SLICE_TYPE_B]); if( rcc->b_2pass ) { diff --git a/encoder/slicetype.c b/encoder/slicetype.c index 570d2aaa..af74427d 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -22,7 +22,6 @@ *****************************************************************************/ #include -#include #include "common/common.h" #include "common/cpu.h" diff --git a/x264.c b/x264.c index 32e65705..b7083c02 100644 --- a/x264.c +++ b/x264.c @@ -196,6 +196,10 @@ static void Help( x264_param_t *defaults, int b_longhelp ) H1( " --no-deblock Disable loop filter\n" ); H0( " -f, --deblock Loop filter AlphaC0 and Beta parameters [%d:%d]\n", defaults->i_deblocking_filter_alphac0, defaults->i_deblocking_filter_beta ); + H1( " --slices Number of slices per frame; forces rectangular\n" + " slices and is overridden by other slicing options\n" ); + H1( " --slice-max-size Limit the size of each slice in bytes\n"); + H1( " --slice-max-mbs Limit the size of each slice in macroblocks\n"); H0( " --interlaced Enable pure-interlaced mode\n" ); H0( "\n" ); H0( "Ratecontrol:\n" ); @@ -459,6 +463,9 @@ static struct option long_options[] = { "zones", required_argument, NULL, 0 }, { "qpfile", required_argument, NULL, OPT_QPFILE }, { "threads", required_argument, NULL, 0 }, + { "slice-max-size", required_argument, NULL, 0 }, + { "slice-max-mbs", required_argument, NULL, 0 }, + { "slices", required_argument, NULL, 0 }, { "thread-input", no_argument, NULL, OPT_THREAD_INPUT }, { "non-deterministic", no_argument, NULL, 0 }, { "psnr", no_argument, NULL, 0 }, @@ -1030,13 +1037,13 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame ) { file_pos = ftell( opt->qpfile ); ret = fscanf( opt->qpfile, "%d %c %d\n", &num, &type, &qp ); - if( num > i_frame || ret == EOF ) - { - pic->i_type = X264_TYPE_AUTO; - pic->i_qpplus1 = 0; - fseek( opt->qpfile , file_pos , SEEK_SET ); - break; - } + if( num > i_frame || ret == EOF ) + { + pic->i_type = X264_TYPE_AUTO; + pic->i_qpplus1 = 0; + fseek( opt->qpfile , file_pos , SEEK_SET ); + break; + } if( num < i_frame && ret == 3 ) continue; pic->i_qpplus1 = qp+1; diff --git a/x264.h b/x264.h index 7fa508d1..6609a32b 100644 --- a/x264.h +++ b/x264.h @@ -35,7 +35,7 @@ #include -#define X264_BUILD 72 +#define X264_BUILD 73 /* x264_t: * opaque handler for encoder */ @@ -298,6 +298,11 @@ typedef struct x264_param_t int b_repeat_headers; /* put SPS/PPS before each keyframe */ int i_sps_id; /* SPS and PPS id number */ + /* Slicing parameters */ + int i_slice_max_size; /* Max size per slice in bytes; includes estimated NAL overhead. */ + int i_slice_max_mbs; /* Max number of MBs per slice; overrides i_slice_count. */ + int i_slice_count; /* Number of slices per frame: forces rectangular slices. */ + /* Optional callback for freeing this x264_param_t when it is done being used. * Only used when the x264_param_t sits in memory for an indefinite period of time, * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones. -- 2.40.0