From d020c4274edab45314c6bcf324d05f21dd13a93c Mon Sep 17 00:00:00 2001 From: Lamont Alston Date: Wed, 16 Jun 2010 10:05:17 -0700 Subject: [PATCH] Open-GOP support Allows B-frames immediately prior to keyframes (in display order). This helps reduce keyframe popping and improve compression with short keyframe intervals. Due to a staggering display of braindamage in the Blu-ray spec, two open-GOP modes are available. The two modes calculate keyframe interval differently: one based on coded distance and one based on display distance. The latter is superior compression-wise, but for no comprehensible reason, Blu-ray requires the former if open-GOP is used. --- common/common.c | 13 ++++++++-- common/common.h | 6 ++++- encoder/encoder.c | 45 ++++++++++++++++++++++------------ encoder/ratecontrol.c | 1 + encoder/slicetype.c | 57 ++++++++++++++++++++++++++++++++----------- x264.c | 11 ++++++++- x264.h | 8 +++++- 7 files changed, 107 insertions(+), 34 deletions(-) diff --git a/common/common.c b/common/common.c index 2a9c76e4..4612bb55 100644 --- a/common/common.c +++ b/common/common.c @@ -676,6 +676,15 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->i_bframe_pyramid = atoi(value); } } + OPT("open-gop") + { + b_error |= parse_enum( value, x264_open_gop_names, &p->i_open_gop ); + if( b_error ) + { + b_error = 0; + p->i_open_gop = atoi(value); + } + } OPT("nf") p->b_deblocking_filter = !atobool(value); OPT2("filter", "deblock") @@ -1190,9 +1199,9 @@ char *x264_param2string( x264_param_t *p, int b_res ) s += sprintf( s, " bframes=%d", p->i_bframe ); if( p->i_bframe ) { - s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d", + s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d", p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias, - p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred ); + p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->i_open_gop ); } s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 ); diff --git a/common/common.h b/common/common.h index 60899fe8..dfa11215 100644 --- a/common/common.h +++ b/common/common.h @@ -471,7 +471,11 @@ struct x264_t /* frames used for reference + sentinels */ x264_frame_t *reference[16+2]; - int i_last_keyframe; /* Frame number of the last keyframe */ + int i_last_keyframe; /* Frame number of the last keyframe */ + int i_last_idr; /* Frame number of the last IDR (not RP)*/ + int i_poc_last_open_gop; /* Poc of the I frame of the last open-gop. The value + * is only assigned during the period between that + * I frame and the next P or I frame, else -1 */ int i_input; /* Number of input frames already accepted */ diff --git a/encoder/encoder.c b/encoder/encoder.c index 2791b324..73e4238c 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -573,12 +573,10 @@ static int x264_validate_parameters( x264_t *h ) x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" ); h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL; } - h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX ); + h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) ); + h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER ); if( h->param.i_keyint_max == 1 ) - { - h->param.i_bframe = 0; h->param.b_intra_refresh = 0; - } h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 ); if( h->param.i_bframe <= 1 ) h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE; @@ -588,6 +586,7 @@ static int x264_validate_parameters( x264_t *h ) h->param.i_bframe_adaptive = X264_B_ADAPT_NONE; h->param.analyse.i_direct_mv_pred = 0; h->param.analyse.b_weighted_bipred = 0; + h->param.i_open_gop = X264_OPEN_GOP_NONE; } if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL ) { @@ -599,6 +598,11 @@ static int x264_validate_parameters( x264_t *h ) x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" ); h->param.i_frame_reference = 1; } + if( h->param.b_intra_refresh && h->param.i_open_gop ) + { + x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" ); + h->param.i_open_gop = X264_OPEN_GOP_NONE; + } if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO ) h->param.i_keyint_min = h->param.i_keyint_max / 10; h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 ); @@ -978,9 +982,11 @@ x264_t *x264_encoder_open( x264_param_t *param ) h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0; h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8); + h->frames.i_last_idr = h->frames.i_last_keyframe = - h->param.i_keyint_max; h->frames.i_input = 0; h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1; + h->frames.i_poc_last_open_gop = -1; CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) ); /* Allocate room for max refs plus a few extra just in case. */ @@ -1688,35 +1694,37 @@ static inline void x264_reference_hierarchy_reset( x264_t *h ) { int ref; int b_hasdelayframe = 0; - if( !h->param.i_bframe_pyramid ) - return; /* look for delay frames -- chain must only contain frames that are disposable */ for( int i = 0; h->frames.current[i] && IS_DISPOSABLE( h->frames.current[i]->i_type ); i++ ) b_hasdelayframe |= h->frames.current[i]->i_coded != h->frames.current[i]->i_frame + h->sps->vui.i_num_reorder_frames; - if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe ) + /* This function must handle b-pyramid and clear frames for open-gop */ + if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe && h->frames.i_poc_last_open_gop == -1 ) return; /* Remove last BREF. There will never be old BREFs in the * dpb during a BREF decode when pyramid == STRICT */ for( ref = 0; h->frames.reference[ref]; ref++ ) { - if( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT + if( ( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT && h->frames.reference[ref]->i_type == X264_TYPE_BREF ) + || ( h->frames.reference[ref]->i_poc < h->frames.i_poc_last_open_gop + && h->sh.i_type != SLICE_TYPE_B ) ) { int diff = h->i_frame_num - h->frames.reference[ref]->i_frame_num; h->sh.mmco[h->sh.i_mmco_command_count].i_difference_of_pic_nums = diff; h->sh.mmco[h->sh.i_mmco_command_count++].i_poc = h->frames.reference[ref]->i_poc; - x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) ); + x264_frame_push_unused( h, x264_frame_shift( &h->frames.reference[ref] ) ); h->b_ref_reorder[0] = 1; - break; + ref--; } } - /* Prepare to room in the dpb for the delayed display time of the later b-frame's */ - h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 ); + /* Prepare room in the dpb for the delayed display time of the later b-frame's */ + if( h->param.i_bframe_pyramid ) + h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 ); } static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp ) @@ -2321,12 +2329,17 @@ int x264_encoder_encode( x264_t *h, { h->frames.i_last_keyframe = h->fenc->i_frame; if( h->fenc->i_type == X264_TYPE_IDR ) + { h->i_frame_num = 0; + h->frames.i_last_idr = h->fenc->i_frame; + } } h->sh.i_mmco_command_count = h->sh.i_mmco_remove_from_end = 0; h->b_ref_reorder[0] = h->b_ref_reorder[1] = 0; + h->fdec->i_poc = + h->fenc->i_poc = 2 * ( h->fenc->i_frame - X264_MAX( h->frames.i_last_idr, 0 ) ); /* ------------------- Setup frame context ----------------------------- */ /* 5: Init data dependent of frame type */ @@ -2337,6 +2350,7 @@ int x264_encoder_encode( x264_t *h, i_nal_ref_idc = NAL_PRIORITY_HIGHEST; h->sh.i_type = SLICE_TYPE_I; x264_reference_reset( h ); + h->frames.i_poc_last_open_gop = -1; } else if( h->fenc->i_type == X264_TYPE_I ) { @@ -2344,6 +2358,8 @@ int x264_encoder_encode( x264_t *h, i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/ h->sh.i_type = SLICE_TYPE_I; x264_reference_hierarchy_reset( h ); + if( h->param.i_open_gop ) + h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1; } else if( h->fenc->i_type == X264_TYPE_P ) { @@ -2351,6 +2367,7 @@ int x264_encoder_encode( x264_t *h, i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/ h->sh.i_type = SLICE_TYPE_P; x264_reference_hierarchy_reset( h ); + h->frames.i_poc_last_open_gop = -1; } else if( h->fenc->i_type == X264_TYPE_BREF ) { @@ -2366,8 +2383,6 @@ int x264_encoder_encode( x264_t *h, h->sh.i_type = SLICE_TYPE_B; } - h->fdec->i_poc = - h->fenc->i_poc = 2 * (h->fenc->i_frame - h->frames.i_last_keyframe); h->fdec->i_type = h->fenc->i_type; h->fdec->i_frame = h->fenc->i_frame; h->fenc->b_kept_as_ref = @@ -2484,7 +2499,7 @@ int x264_encoder_encode( x264_t *h, if( h->fenc->i_type != X264_TYPE_IDR ) { - int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe; + int time_to_recovery = h->param.i_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe; x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery ); x264_nal_end( h ); diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index adb5bbba..a588de90 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -724,6 +724,7 @@ int x264_ratecontrol_new( x264_t *h ) CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid ); CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh ); CMP_OPT_FIRST_PASS( "keyint", h->param.i_keyint_max ); + CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop ); if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR ) x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" ); diff --git a/encoder/slicetype.c b/encoder/slicetype.c index 0717c9c5..ecd460f1 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -1058,7 +1058,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe ) { x264_mb_analysis_t a; x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, }; - int num_frames, orig_num_frames, keyint_limit, idr_frame_type, framecnt; + int num_frames, orig_num_frames, keyint_limit, framecnt; int i_mb_count = NUM_MBS; int cost1p0, cost2p0, cost1b1, cost2p1; int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX ); @@ -1080,7 +1080,6 @@ void x264_slicetype_analyse( x264_t *h, int keyframe ) orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit ); x264_lowres_context_init( h, &a ); - idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I; /* This is important psy-wise: if we have a non-scenecut keyframe, * there will be significant visual artifacts if the frames just before @@ -1092,12 +1091,12 @@ void x264_slicetype_analyse( x264_t *h, int keyframe ) { frames[1]->i_type = X264_TYPE_P; if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) ) - frames[1]->i_type = idr_frame_type; + frames[1]->i_type = X264_TYPE_I; return; } else if( num_frames == 0 ) { - frames[1]->i_type = idr_frame_type; + frames[1]->i_type = X264_TYPE_I; return; } @@ -1106,7 +1105,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe ) int reset_start; if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) ) { - frames[1]->i_type = idr_frame_type; + frames[1]->i_type = X264_TYPE_I; return; } @@ -1210,15 +1209,19 @@ void x264_slicetype_analyse( x264_t *h, int keyframe ) /* Enforce keyframe limit. */ if( !h->param.b_intra_refresh ) - for( int j = 0; j < num_frames; j++ ) + for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max ) { - if( ((j-keyint_limit) % h->param.i_keyint_max) == 0 ) + int j = i; + if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER ) { - if( j && h->param.i_keyint_max > 1 ) - frames[j]->i_type = X264_TYPE_P; - frames[j+1]->i_type = X264_TYPE_IDR; - reset_start = X264_MIN( reset_start, j+2 ); + while( IS_X264_TYPE_B( frames[i]->i_type ) ) + i++; + while( IS_X264_TYPE_B( frames[j-1]->i_type ) ) + j--; } + frames[i]->i_type = X264_TYPE_I; + reset_start = X264_MIN( reset_start, i+1 ); + i = j; } if( h->param.rc.i_vbv_buffer_size ) @@ -1303,13 +1306,39 @@ void x264_slicetype_decide( x264_t *h ) frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference ); } + if( frm->i_type == X264_TYPE_KEYFRAME ) + frm->i_type = h->param.i_open_gop ? X264_TYPE_I : X264_TYPE_IDR; + /* Limit GOP size */ if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max ) { - if( frm->i_type == X264_TYPE_AUTO ) + if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I ) + frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR; + int warn = frm->i_type != X264_TYPE_IDR; + if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER ) + warn &= frm->i_type != X264_TYPE_I && frm->i_type != X264_TYPE_KEYFRAME; + if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER ) + { + /* if this minigop ends with i, it's not a violation */ + int j = bframes; + while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) ) + j++; + warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME; + } + if( warn ) + x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame ); + } + if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ) + { + if( h->param.i_open_gop ) + { + h->lookahead->i_last_keyframe = frm->i_frame; // Use display order + if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER ) + h->lookahead->i_last_keyframe -= bframes; // Use coded order + frm->b_keyframe = 1; + } + else frm->i_type = X264_TYPE_IDR; - if( frm->i_type != X264_TYPE_IDR ) - x264_log( h, X264_LOG_WARNING, "specified frame type (%d) is not compatible with keyframe interval\n", frm->i_type ); } if( frm->i_type == X264_TYPE_IDR ) { diff --git a/x264.c b/x264.c index 09bad61c..87225651 100644 --- a/x264.c +++ b/x264.c @@ -380,6 +380,12 @@ static void Help( x264_param_t *defaults, int longhelp ) " - strict: Strictly hierarchical pyramid\n" " - normal: Non-strict (not Blu-ray compatible)\n", strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) ); + H1( " --open-gop Use recovery points to close GOPs [none]\n" + " - none: Use standard closed GOPs\n" + " - display: Base GOP length on display order\n" + " (not Blu-ray compatible)\n" + " - coded: Base GOP length on coded order\n" + " Only available with b-frames\n" ); H1( " --no-cabac Disable CABAC\n" ); H1( " -r, --ref Number of reference frames [%d]\n", defaults->i_frame_reference ); H1( " --no-deblock Disable loop filter\n" ); @@ -441,7 +447,8 @@ static void Help( x264_param_t *defaults, int longhelp ) " or b= (bitrate multiplier)\n" ); H2( " --qpfile Force frametypes and QPs for some or all frames\n" " Format of each line: framenumber frametype QP\n" - " QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n" + " QP of -1 lets x264 choose. Frametypes: I,i,K,P,B,b.\n" + " K= depending on open-gop setting\n" " QPs are restricted by qpmin/qpmax.\n" ); H1( "\n" ); H1( "Analysis:\n" ); @@ -627,6 +634,7 @@ static struct option long_options[] = { "no-b-adapt", no_argument, NULL, 0 }, { "b-bias", required_argument, NULL, 0 }, { "b-pyramid", required_argument, NULL, 0 }, + { "open-gop", required_argument, NULL, 0 }, { "min-keyint", required_argument, NULL, 'i' }, { "keyint", required_argument, NULL, 'I' }, { "intra-refresh", no_argument, NULL, 0 }, @@ -1305,6 +1313,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame ) pic->i_qpplus1 = qp+1; if ( type == 'I' ) pic->i_type = X264_TYPE_IDR; else if( type == 'i' ) pic->i_type = X264_TYPE_I; + else if( type == 'K' ) pic->i_type = X264_TYPE_KEYFRAME; else if( type == 'P' ) pic->i_type = X264_TYPE_P; else if( type == 'B' ) pic->i_type = X264_TYPE_BREF; else if( type == 'b' ) pic->i_type = X264_TYPE_B; diff --git a/x264.h b/x264.h index 9cd4600d..09183fde 100644 --- a/x264.h +++ b/x264.h @@ -35,7 +35,7 @@ #include -#define X264_BUILD 98 +#define X264_BUILD 99 /* x264_t: * opaque handler for encoder */ @@ -104,6 +104,9 @@ typedef struct x264_t x264_t; #define X264_B_PYRAMID_STRICT 1 #define X264_B_PYRAMID_NORMAL 2 #define X264_KEYINT_MIN_AUTO 0 +#define X264_OPEN_GOP_NONE 0 +#define X264_OPEN_GOP_DISPLAY_ORDER 1 +#define X264_OPEN_GOP_CODED_ORDER 2 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 }; static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 }; @@ -115,6 +118,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", " static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 }; static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 }; static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 }; +static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 }; /* Colorspace type * legacy only; nothing other than I420 is really supported. */ @@ -138,6 +142,7 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 }; #define X264_TYPE_P 0x0003 #define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */ #define X264_TYPE_B 0x0005 +#define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */ #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR) #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF) @@ -221,6 +226,7 @@ typedef struct x264_param_t int i_bframe_adaptive; int i_bframe_bias; int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */ + int i_open_gop; /* Open gop: 1=display order, 2=coded order to determine gop size */ int b_deblocking_filter; int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */ -- 2.40.0