On by default; can be turned off with --no-mbtree.
Uses a large lookahead to track temporal propagation of data and weight quality accordingly.
Requires a very large separate statsfile (2 bytes per macroblock) in multi-pass mode.
Doesn't work with b-pyramid yet.
Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat.
This makes the "medium" preset a bit slower. Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast".
All presets "fast" and above will have MB-tree on.
Add a new option, --rc-lookahead, to control the distance MB tree looks ahead to perform propagation analysis.
Default is 40; larger values will be slower and require more memory but give more accurate results.
This value will be used in the future to control ratecontrol lookahead (VBV).
Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM.
This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters.
Quality improvement from MB-tree is about 2-70% depending on content.
Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength.
Note that MB-tree may perform slightly suboptimally on fades; this will be fixed by weighted prediction, which is coming soon.
param->rc.f_pb_factor = 1.3;
param->rc.i_aq_mode = X264_AQ_VARIANCE;
param->rc.f_aq_strength = 1.0;
+ param->rc.i_lookahead = 40;
param->rc.b_stat_write = 0;
param->rc.psz_stat_out = "x264_2pass.log";
param->rc.f_qblur = 0.5;
param->rc.f_complexity_blur = 20;
param->rc.i_zones = 0;
+ param->rc.b_mb_tree = 1;
/* Log */
param->pf_log = x264_log_default;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
param->analyse.i_me_method = X264_ME_HEX;
param->analyse.f_psy_rd = 1.0;
+ param->analyse.b_psy = 1;
param->analyse.f_psy_trellis = 0;
param->analyse.i_me_range = 16;
param->analyse.i_subpel_refine = 7;
p->analyse.f_psy_trellis = 0;
}
}
+ OPT("psy")
+ p->analyse.b_psy = atobool(value);
OPT("chroma-me")
p->analyse.b_chroma_me = atobool(value);
OPT("mixed-refs")
p->rc.f_rf_constant = atof(value);
p->rc.i_rc_method = X264_RC_CRF;
}
+ OPT("rc-lookahead")
+ p->rc.i_lookahead = atoi(value);
OPT2("qpmin", "qp-min")
p->rc.i_qp_min = atoi(value);
OPT2("qpmax", "qp-max")
}
OPT("qcomp")
p->rc.f_qcompress = atof(value);
+ OPT("mbtree")
+ p->rc.b_mb_tree = atobool(value);
OPT("qblur")
p->rc.f_qblur = atof(value);
OPT2("cplxblur", "cplx-blur")
s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
- s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
+ s += sprintf( s, " psy=%d", p->analyse.b_psy );
+ if( p->analyse.b_psy )
+ s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
- s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
+ if( p->rc.b_mb_tree )
+ s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
+
+ s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
- : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
+ : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
{
if( p->rc.i_rc_method == X264_RC_CRF )
if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
{
s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
- if( p->i_bframe )
+ if( p->i_bframe && !p->rc.b_mb_tree )
s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
if( p->rc.i_aq_mode )
#define X264_SLICE_MAX 4
#define X264_NAL_MAX (4 + X264_SLICE_MAX)
#define X264_PCM_COST (386*8)
+#define X264_LOOKAHEAD_MAX 250
// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
return amvd0 + (amvd1<<16);
}
+static const uint8_t exp2_lut[64] = {
+ 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
+ 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
+ 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
+ 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
+};
+
+static ALWAYS_INLINE int x264_exp2fix8( float x )
+{
+ int i, f;
+ x += 8;
+ if( x <= 0 ) return 0;
+ if( x >= 16 ) return 0xffff;
+ i = x;
+ f = (x-i)*64;
+ return (exp2_lut[f]+256) << i >> 8;
+}
+
+static const float log2_lut[128] = {
+ 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
+ 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
+ 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
+ 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
+ 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
+ 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
+ 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
+ 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
+ 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
+ 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
+ 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
+ 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
+ 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
+ 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
+ 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
+ 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
+};
+
+static ALWAYS_INLINE float x264_log2( uint32_t x )
+{
+ int lz = x264_clz( x );
+ return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
+}
+
/****************************************************************************
*
****************************************************************************/
struct
{
/* Frames to be encoded (whose types have been decided) */
- x264_frame_t *current[X264_BFRAME_MAX*4+3];
+ x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
/* Temporary buffer (frames types not yet decided) */
- x264_frame_t *next[X264_BFRAME_MAX*4+3];
+ x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
/* Unused frames */
- x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4];
+ x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
/* For adaptive B decision */
x264_frame_t *last_nonb;
memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
}
+ CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
+ memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) );
+ CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
+ for( j = 0; j <= h->param.i_bframe+1; j++ )
+ for( i = 0; i <= h->param.i_bframe+1; i++ )
+ {
+ CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
+ CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) );
+ }
}
if( h->param.analyse.i_me_method >= X264_ME_ESA )
CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
if( h->param.i_bframe )
{
CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
int8_t *mb_type;
int16_t (*mv[2])[2];
int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
+ uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
+ uint8_t (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
int *lowres_mv_costs[2][X264_BFRAME_MAX+1];
int8_t *ref[2];
int i_ref[2];
float *f_qp_offset;
int b_intra_calculated;
uint16_t *i_intra_cost;
+ uint32_t *i_propagate_cost;
uint16_t *i_inv_qscale_factor;
/* threading */
#ifdef WORDS_BIGENDIAN
#define endian_fix(x) (x)
#define endian_fix32(x) (x)
-#elif defined(__GNUC__) && defined(HAVE_MMX)
+#define endian_fix16(x) (x)
+#else
+#if defined(__GNUC__) && defined(HAVE_MMX)
static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
{
asm("bswap %0":"+r"(x));
return endian_fix32(x);
}
#endif
+static ALWAYS_INLINE uint16_t endian_fix16( uint16_t x )
+{
+ return (x<<8)|(x>>8);
+}
+#endif
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 3)
#define x264_clz(x) __builtin_clz(x)
h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
}
h->mb.i_psy_rd_lambda = a->i_lambda;
- /* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */
- h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
+ /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
+ h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
h->mb.i_me_method = h->param.analyse.i_me_method;
h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
#define bs_write_ue bs_write_ue_big
-static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
+static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic_out );
h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
h->param.rc.i_aq_mode = 0;
+ h->param.rc.b_mb_tree = 0;
}
h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
if( !h->param.i_bframe )
h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0;
+ h->param.rc.i_lookahead = x264_clip3( h->param.rc.i_lookahead, 0, X264_LOOKAHEAD_MAX );
+ h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, h->param.i_keyint_max );
+ if( h->param.rc.b_stat_read )
+ h->param.rc.i_lookahead = 0;
+ else if( !h->param.rc.i_lookahead )
+ h->param.rc.b_mb_tree = 0;
+ if( h->param.rc.f_qcompress == 1 )
+ h->param.rc.b_mb_tree = 0;
+
h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
&& h->param.i_bframe
&& ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read );
if( !h->param.b_cabac )
h->param.analyse.i_trellis = 0;
h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
+ if( !h->param.analyse.b_psy )
+ {
+ h->param.analyse.f_psy_rd = 0;
+ h->param.analyse.f_psy_trellis = 0;
+ }
if( !h->param.analyse.i_trellis )
h->param.analyse.f_psy_trellis = 0;
h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
if( h->param.rc.f_aq_strength == 0 )
h->param.rc.i_aq_mode = 0;
+ /* MB-tree requires AQ to be on, even if the strength is zero. */
+ if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
+ {
+ h->param.rc.i_aq_mode = 1;
+ h->param.rc.f_aq_strength = 0;
+ }
+ if( h->param.rc.b_mb_tree && h->param.b_bframe_pyramid )
+ {
+ x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
+ h->param.b_bframe_pyramid = 0;
+ }
h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
h->param.analyse.i_subpel_refine = 9;
h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1;
else
h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
+ if( h->param.rc.b_mb_tree )
+ h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
+
h->frames.i_max_ref0 = h->param.i_frame_reference;
h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering;
&& ( h->param.rc.i_rc_method == X264_RC_ABR
|| h->param.rc.i_rc_method == X264_RC_CRF
|| h->param.i_bframe_adaptive
- || h->param.i_scenecut_threshold );
+ || h->param.i_scenecut_threshold
+ || h->param.rc.b_mb_tree );
h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
if( h->frames.b_have_lowres )
x264_frame_init_lowres( h, fenc );
- if( h->param.rc.i_aq_mode )
+ if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
+ {
+ if( x264_macroblock_tree_read( h, fenc ) )
+ return -1;
+ }
+ else if( h->param.rc.i_aq_mode )
x264_adaptive_quant_frame( h, fenc );
if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
/* 2: Select frame types */
if( h->frames.next[0] == NULL )
{
- x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
+ if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
+ return -1;
return 0;
}
else
x264_slices_write( h );
- x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
+ if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
+ return -1;
return 0;
}
-static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
+static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic_out )
{
if( !h->out.i_nal )
{
pic_out->i_type = X264_TYPE_AUTO;
- return;
+ return 0;
}
x264_frame_push_unused( thread_current, h->fenc );
/* update rc */
x264_emms();
- x264_ratecontrol_end( h, h->out.i_frame_size * 8 );
+ if( x264_ratecontrol_end( h, h->out.i_frame_size * 8 ) < 0 )
+ return -1;
/* restore CPU state (before using float again) */
x264_emms();
if( h->param.psz_dump_yuv )
x264_frame_dump( h );
+
+ return 0;
}
static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra )
double fps;
double bitrate;
double rate_tolerance;
+ double qcompress;
int nmb; /* number of macroblocks in a frame */
int qp_constant[5];
/* 2pass stuff */
FILE *p_stat_file_out;
char *psz_stat_file_tmpname;
+ FILE *p_mbtree_stat_file_out;
+ char *psz_mbtree_stat_file_tmpname;
+ char *psz_mbtree_stat_file_name;
+ FILE *p_mbtree_stat_file_in;
int num_entries; /* number of ratecontrol_entry_ts */
ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
double lmin[5]; /* min qscale by frame type */
double lmax[5];
double lstep; /* max change (multiply) in qscale per frame */
+ uint16_t *qp_buffer; /* Global buffer for converting MB-tree quantizer data. */
/* MBRC stuff */
double frame_size_estimated;
return var;
}
-static const float log2_lut[128] = {
- 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
- 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
- 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
- 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
- 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
- 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
- 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
- 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
- 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
- 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
- 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
- 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
- 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
- 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
- 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
- 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
-};
-
-static const uint8_t exp2_lut[64] = {
- 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
- 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
- 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
- 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
-};
-
-static ALWAYS_INLINE float x264_log2( uint32_t x )
-{
- int lz = x264_clz( x );
- return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
-}
-
-static ALWAYS_INLINE int x264_exp2fix8( float x )
-{
- int i, f;
- x += 8;
- if( x <= 0 ) return 0;
- if( x >= 16 ) return 0xffff;
- i = x;
- f = (x-i)*64;
- return (exp2_lut[f]+256) << i >> 8;
-}
-
void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
{
/* constants chosen to result in approximately the same overall bitrate as without AQ.
int mb_x, mb_y;
float strength;
float avg_adj = 0.f;
+ /* Need to init it anyways for MB tree. */
+ if( h->param.rc.f_aq_strength == 0 )
+ {
+ int mb_xy;
+ memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
+ if( h->frames.b_have_lowres )
+ for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
+ frame->i_inv_qscale_factor[mb_xy] = 256;
+ return;
+ }
+
if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
{
for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
}
else
strength = h->param.rc.f_aq_strength * 1.0397f;
+
for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
{
h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
}
+int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
+{
+ x264_ratecontrol_t *rc = h->rc;
+ uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
+ int i;
+
+ if( i_type_actual != SLICE_TYPE_B )
+ {
+ uint8_t i_type;
+
+ if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
+ goto fail;
+
+ if( i_type != i_type_actual )
+ {
+ x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
+ return -1;
+ }
+
+ if( fread( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
+ goto fail;
+
+ for( i = 0; i < h->mb.i_mb_count; i++ )
+ frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[i] )) * (1/256.0);
+ }
+ else
+ x264_adaptive_quant_frame( h, frame );
+ return 0;
+fail:
+ x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
+ return -1;
+}
+
+static char *x264_strcat_filename( char *input, char *suffix )
+{
+ char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 );
+ strcpy( output, input );
+ strcat( output, suffix );
+ return output;
+}
+
int x264_ratecontrol_new( x264_t *h )
{
x264_ratecontrol_t *rc;
else
rc->fps = 25.0;
+ if( h->param.rc.b_mb_tree )
+ {
+ h->param.rc.f_pb_factor = 1;
+ rc->qcompress = 1;
+ }
+ else
+ rc->qcompress = h->param.rc.f_qcompress;
+
rc->bitrate = h->param.rc.i_bitrate * 1000.;
rc->rate_tolerance = h->param.rc.f_rate_tolerance;
rc->nmb = h->mb.i_mb_count;
rc->accum_p_norm = .01;
rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
/* estimated ratio that produces a reasonable QP for the first I-frame */
- rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
+ rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 );
rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
rc->last_non_b_pict_type = SLICE_TYPE_I;
}
if( h->param.rc.i_rc_method == X264_RC_CRF )
{
- /* arbitrary rescaling to make CRF somewhat similar to QP */
+ /* Arbitrary rescaling to make CRF somewhat similar to QP.
+ * Try to compensate for MB-tree's effects as well. */
double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
- rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
- / qp2qscale( h->param.rc.f_rf_constant );
+ double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
+ rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
+ / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
}
rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
return -1;
}
+ if( h->param.rc.b_mb_tree )
+ {
+ char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
+ rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
+ x264_free( mbtree_stats_in );
+ if( !rc->p_mbtree_stat_file_in )
+ {
+ x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
+ return -1;
+ }
+ }
/* check whether 1st pass options were compatible with current options */
if( !strncmp( stats_buf, "#options:", 9 ) )
x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" );
return -1;
}
+
+ if( h->param.rc.b_mb_tree && ( p = strstr( opts, "rc_lookahead=" ) ) && sscanf( p, "rc_lookahead=%d", &i ) )
+ h->param.rc.i_lookahead = i;
}
/* find number of pics */
if( h->param.rc.b_stat_write )
{
char *p;
-
- rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
- strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
- strcat( rc->psz_stat_file_tmpname, ".temp" );
+ rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" );
rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
if( rc->p_stat_file_out == NULL )
p = x264_param2string( &h->param, 1 );
fprintf( rc->p_stat_file_out, "#options: %s\n", p );
x264_free( p );
+ if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
+ {
+ rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" );
+ rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" );
+
+ rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
+ if( rc->p_mbtree_stat_file_out == NULL )
+ {
+ x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
+ return -1;
+ }
+ }
+ }
+
+ if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) )
+ {
+ rc->qp_buffer = x264_malloc( h->mb.i_mb_count * sizeof(uint16_t));
+ if( !rc->qp_buffer )
+ return -1;
}
for( i=0; i<h->param.i_threads; i++ )
if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR && rc->cbr_decay > .9999 )
{
double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
+ double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*12.5 : 0;
x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
- qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
- * rc->cplxr_sum / rc->wanted_bits_window ) );
+ qscale2qp( pow( base_cplx, 1 - rc->qcompress )
+ * rc->cplxr_sum / rc->wanted_bits_window ) - mbtree_offset );
}
}
}
x264_free( rc->psz_stat_file_tmpname );
}
+ if( rc->p_mbtree_stat_file_out )
+ {
+ fclose( rc->p_mbtree_stat_file_out );
+ if( h->i_frame >= rc->num_entries )
+ if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
+ {
+ x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
+ rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
+ }
+ x264_free( rc->psz_mbtree_stat_file_tmpname );
+ x264_free( rc->psz_mbtree_stat_file_name );
+ }
x264_free( rc->pred );
x264_free( rc->pred_b_from_p );
x264_free( rc->entry );
+ x264_free( rc->qp_buffer );
if( rc->zones )
{
x264_free( rc->zones[0].param );
}
/* After encoding one frame, save stats and update ratecontrol state */
-void x264_ratecontrol_end( x264_t *h, int bits )
+int x264_ratecontrol_end( x264_t *h, int bits )
{
x264_ratecontrol_t *rc = h->rc;
const int *mbs = h->stat.frame.i_mb_count;
( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
: '-';
- fprintf( rc->p_stat_file_out,
+ if( fprintf( rc->p_stat_file_out,
"in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
h->fenc->i_frame, h->i_frame,
c_type, rc->qpa_rc,
h->stat.frame.i_mb_count_i,
h->stat.frame.i_mb_count_p,
h->stat.frame.i_mb_count_skip,
- c_direct);
+ c_direct) < 0 )
+ goto fail;
+
+ /* Don't re-write the data in multi-pass mode. */
+ if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read )
+ {
+ uint8_t i_type = h->sh.i_type;
+ int i;
+ /* Values are stored as big-endian FIX8.8 */
+ for( i = 0; i < h->mb.i_mb_count; i++ )
+ rc->qp_buffer[i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
+ if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 )
+ goto fail;
+ if( fwrite( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
+ goto fail;
+ }
}
if( rc->b_abr )
}
update_vbv( h, bits );
+ return 0;
+fail:
+ x264_log(h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n");
+ return -1;
}
/****************************************************************************
double q;
x264_zone_t *zone = get_zone( h, frame_num );
- q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress );
+ q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
// avoid NaN's in the rc_eq
if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0)
void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
void x264_adaptive_quant( x264_t * );
+int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
void x264_ratecontrol_start( x264_t *, int i_force_qp );
int x264_ratecontrol_slice_type( x264_t *, int i_frame );
void x264_ratecontrol_mb( x264_t *, int bits );
int x264_ratecontrol_qp( x264_t * );
-void x264_ratecontrol_end( x264_t *, int bits );
+int x264_ratecontrol_end( x264_t *, int bits );
void x264_ratecontrol_summary( x264_t * );
void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
int x264_ratecontrol_get_estimated_size( x264_t const *);
x264_me_t m[2];
int i_bcost = COST_MAX;
int l, i;
+ int list_used = 0;
h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );
h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
- if( i_bcost > i_cost ) \
- i_bcost = i_cost; \
+ COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
}
m[0].i_pixel = PIXEL_8x8;
int i_cost;
h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
- if( i_bcost > i_cost )
- i_bcost = i_cost;
+ COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
}
}
*(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l];
m[l].cost = *fenc_costs[l];
}
- i_bcost = X264_MIN( i_bcost, m[l].cost );
+ COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
}
if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
TRY_BIDIR( m[0].mv, m[1].mv, 5 );
+ frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy] = list_used;
+
lowres_intra_mb:
/* forbid intra-mbs in B-frames, because it's rare and not worth checking */
/* FIXME: Should we still forbid them now that we cache intra scores? */
- if( !b_bidir )
+ if( !b_bidir || h->param.rc.b_mb_tree )
{
int i_icost, b_intra;
if( !fenc->b_intra_calculated )
}
else
i_icost = fenc->i_intra_cost[i_mb_xy];
- b_intra = i_icost < i_bcost;
- if( b_intra )
- i_bcost = i_icost;
- if( (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
- && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
- || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+ if( !b_bidir )
{
- fenc->i_intra_mbs[b-p0] += b_intra;
- fenc->i_cost_est[0][0] += i_icost;
+ b_intra = i_icost < i_bcost;
+ if( b_intra )
+ i_bcost = i_icost;
+ if( (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
+ && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
+ || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+ {
+ fenc->i_intra_mbs[b-p0] += b_intra;
+ fenc->i_cost_est[0][0] += i_icost;
+ }
}
}
+ frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost;
+
return i_bcost;
}
#undef TRY_BIDIR
x264_frame_t **frames, int p0, int p1, int b,
int b_intra_penalty )
{
+
int i_score = 0;
/* Don't use the AQ'd scores for slicetype decision. */
int i_score_aq = 0;
/* the edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
- if( h->param.rc.i_vbv_buffer_size || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+ if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
+ h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
{
for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
{
return i_score;
}
-#define MAX_LENGTH (X264_BFRAME_MAX*4)
+/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
+ * re-running lookahead. */
+static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
+ int p0, int p1, int b )
+{
+ int i_score = 0;
+ int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
+ x264_emms();
+ for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
+ {
+ row_satd[ h->mb.i_mb_y ] = 0;
+ for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
+ {
+ int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
+ int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
+ float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
+ i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
+ row_satd[ h->mb.i_mb_y ] += i_mb_cost;
+ if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
+ h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
+ h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+ {
+ i_score += i_mb_cost;
+ }
+ }
+ }
+ return i_score;
+}
+
+static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
+{
+ x264_frame_t *refs[2] = {frames[p0],frames[p1]};
+ int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
+ int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
+
+ for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+ {
+ for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+ {
+ int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
+ int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
+ int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
+ int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
+ /* The approximate amount of data that this block contains. */
+ int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
+
+ /* Divide by 64 for per-pixel summing. */
+ propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
+
+ /* Don't propagate for an intra block. */
+ if( inter_cost < intra_cost )
+ {
+ int mv[2][2], list;
+ mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
+ mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
+ if( b != p1 )
+ {
+ mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
+ mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
+ }
+
+ /* Follow the MVs to the previous frame(s). */
+ for( list = 0; list < 2; list++ )
+ if( (lists_used >> list)&1 )
+ {
+ int x = mv[list][0];
+ int y = mv[list][1];
+ int listamount = propagate_amount;
+ int mbx = (x>>5)+h->mb.i_mb_x;
+ int mby = ((y>>5)+h->mb.i_mb_y);
+ int idx0 = mbx + mby*h->mb.i_mb_stride;
+ int idx1 = idx0 + 1;
+ int idx2 = idx0 + h->mb.i_mb_stride;
+ int idx3 = idx0 + h->mb.i_mb_stride + 1;
+ int idx0weight = (32-(y&31))*(32-(x&31));
+ int idx1weight = (32-(y&31))*(x&31);
+ int idx2weight = (y&31)*(32-(x&31));
+ int idx3weight = (y&31)*(x&31);
+
+ /* Apply bipred weighting. */
+ if( lists_used == 3 )
+ listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6;
+
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
+
+ /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
+ * be counted. */
+ if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
+ {
+ CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
+ CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
+ CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
+ CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+ }
+ else /* Check offsets individually */
+ {
+ if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
+ CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
+ if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
+ CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
+ if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
+ CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
+ if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
+ CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+ }
+ }
+ }
+ }
+ }
+}
+
+static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
+{
+ int i, idx = !b_intra;
+ int last_nonb, cur_nonb = 1;
+ if( b_intra )
+ x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
+
+ i = num_frames-1;
+ while( i > 0 && frames[i]->i_type == X264_TYPE_B )
+ i--;
+ last_nonb = i;
+
+ if( last_nonb < 0 )
+ return;
+
+ memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+ while( i-- > idx )
+ {
+ cur_nonb = i;
+ while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
+ cur_nonb--;
+ if( cur_nonb < idx )
+ break;
+ x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
+ memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+ x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
+ while( frames[i]->i_type == X264_TYPE_B && i > 0 )
+ {
+ x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
+ memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+ x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
+ i--;
+ }
+ last_nonb = cur_nonb;
+ }
+ x264_emms();
+
+ for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+ {
+ for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+ {
+ int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
+ int intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
+
+ if( intra_cost )
+ {
+ int propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
+ float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
+ /* Allow the constant to be adjusted via qcompress, since the two
+ * concepts are very similar. */
+ frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
+ }
+ }
+ }
+}
static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
{
/* Uses strings due to the fact that the speed of the control functions is
negligable compared to the cost of running slicetype_frame_cost, and because
it makes debugging easier. */
-static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[MAX_LENGTH] )
+static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] )
{
- char paths[X264_BFRAME_MAX+2][MAX_LENGTH] = {{0}};
+ char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}};
int num_paths = X264_MIN(max_bframes+1, length);
int suffix_size, loc, path;
int best_cost = COST_MAX;
int best_path_index = 0;
- length = X264_MIN(length,MAX_LENGTH);
+ length = X264_MIN(length,X264_LOOKAHEAD_MAX);
/* Iterate over all currently possible paths and add suffixes to each one */
for( suffix_size = 0; suffix_size < num_paths; suffix_size++ )
memcpy( best_paths[length], paths[best_path_index], length );
}
-static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int bframes, int buffer )
-{
- char best_paths[MAX_LENGTH][MAX_LENGTH] = {"","P"};
- int n;
- for( n = 2; n < length-1; n++ )
- x264_slicetype_path( h, a, frames, n, bframes, buffer, best_paths );
- return strspn( best_paths[length-2], "B" );
-}
-
static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 )
{
x264_frame_t *frame = frames[p1];
return res;
}
-static void x264_slicetype_analyse( x264_t *h )
+static void x264_slicetype_analyse( x264_t *h, int keyframe )
{
x264_mb_analysis_t a;
- x264_frame_t *frames[X264_BFRAME_MAX*4+3] = { NULL, };
+ x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
int num_frames;
int keyint_limit;
- int j;
+ int i,j;
int i_mb_count = NUM_MBS;
int cost1p0, cost2p0, cost1b1, cost2p1;
int idr_frame_type;
frames[j+1] = h->frames.next[j];
keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
num_frames = X264_MIN( j, keyint_limit );
- if( num_frames == 0 )
+
+ if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
return;
x264_lowres_context_init( h, &a );
idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
- if( num_frames == 1 )
+ if( num_frames == 1 && !h->param.rc.b_mb_tree )
{
-no_b_frames:
frames[1]->i_type = X264_TYPE_P;
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
frames[1]->i_type = idr_frame_type;
return;
}
- if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
+ /* This is important psy-wise: if we have a non-scenecut keyframe,
+ * there will be significant visual artifacts if the frames just before
+ * go down in quality due to being referenced less, despite it being
+ * more RD-optimal. */
+ if( h->param.analyse.b_psy && h->param.rc.b_mb_tree )
+ num_frames = j;
+
+ char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
+ int n;
+ int num_bframes = 0;
+ int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
+ int num_analysed_frames = num_frames;
+ int reset_start;
+ if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
{
- int num_bframes;
- int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
+ frames[1]->i_type = idr_frame_type;
+ return;
+ }
+
+ if( h->param.i_bframe )
+ {
+ if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
{
- frames[1]->i_type = idr_frame_type;
- return;
+ /* Perform the frametype analysis. */
+ for( n = 2; n < num_frames-1; n++ )
+ x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
+ num_bframes = strspn( best_paths[num_frames-2], "B" );
+ /* Load the results of the analysis into the frame types. */
+ for( j = 1; j < num_frames; j++ )
+ frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P;
+ frames[num_frames]->i_type = X264_TYPE_P;
}
- num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes );
- assert(num_bframes < num_frames);
-
- for( j = 1; j < num_bframes+1; j++ )
+ else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
{
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
+ for( i = 0; i < num_frames-(2-!i); )
{
- frames[j]->i_type = X264_TYPE_P;
- return;
- }
- frames[j]->i_type = X264_TYPE_B;
- }
- frames[num_bframes+1]->i_type = X264_TYPE_P;
- }
- else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
- {
- cost2p1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 2, 1 );
- if( frames[2]->i_intra_mbs[2] > i_mb_count / 2 )
- goto no_b_frames;
+ cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 );
+ if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 )
+ {
+ frames[i+1]->i_type = X264_TYPE_P;
+ frames[i+2]->i_type = X264_TYPE_P;
+ i += 2;
+ continue;
+ }
- cost1b1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 1, 0 );
- cost1p0 = x264_slicetype_frame_cost( h, &a, frames, 0, 1, 1, 0 );
- cost2p0 = x264_slicetype_frame_cost( h, &a, frames, 1, 2, 2, 0 );
+ cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 );
+ cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 );
+ cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 );
- if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
- goto no_b_frames;
+ if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
+ {
+ frames[i+1]->i_type = X264_TYPE_P;
+ frames[i+2]->i_type = X264_TYPE_P;
+ i += 2;
+ continue;
+ }
- // arbitrary and untuned
- #define INTER_THRESH 300
- #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
- frames[1]->i_type = X264_TYPE_B;
+ // arbitrary and untuned
+ #define INTER_THRESH 300
+ #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
+ frames[i+1]->i_type = X264_TYPE_B;
+ frames[i+2]->i_type = X264_TYPE_P;
- for( j = 2; j <= X264_MIN( h->param.i_bframe, num_frames-1 ); j++ )
+ for( j = i+2; j <= X264_MIN( h->param.i_bframe, num_frames-2 ); j++ )
+ {
+ int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10);
+ int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 );
+
+ if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 )
+ {
+ frames[j]->i_type = X264_TYPE_P;
+ break;
+ }
+ else
+ frames[j]->i_type = X264_TYPE_B;
+ }
+ i = j;
+ }
+ frames[i+!i]->i_type = X264_TYPE_P;
+ num_bframes = 0;
+ while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B )
+ num_bframes++;
+ }
+ else
{
- int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-1), INTER_THRESH/10);
- int pcost = x264_slicetype_frame_cost( h, &a, frames, 0, j+1, j+1, 1 );
+ num_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
+ for( j = 1; j < num_frames; j++ )
+ frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P;
+ frames[num_frames]->i_type = X264_TYPE_P;
+ }
- if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j+1] > i_mb_count/3 )
+ /* Check scenecut on the first minigop. */
+ for( j = 1; j < num_bframes+1; j++ )
+ if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
{
frames[j]->i_type = X264_TYPE_P;
+ num_analysed_frames = j;
break;
}
- else
- frames[j]->i_type = X264_TYPE_B;
- }
+
+ reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 );
}
else
{
- int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
- {
- frames[1]->i_type = idr_frame_type;
- return;
- }
+ for( j = 1; j < num_frames; j++ )
+ frames[j]->i_type = X264_TYPE_P;
+ reset_start = !keyframe + 1;
+ }
- for( j = 1; j < max_bframes+1; j++ )
- {
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
+ /* Perform the actual macroblock tree analysis.
+ * Don't go farther than the lookahead parameter; this helps in short GOPs. */
+ if( h->param.rc.b_mb_tree )
+ x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.rc.i_lookahead), keyframe );
+
+ /* Enforce keyframe limit. */
+ if( h->param.i_bframe )
+ for( j = 0; j <= num_bframes; j++ )
+ if( j+1 > keyint_limit )
{
- frames[j]->i_type = X264_TYPE_P;
- return;
+ if( j )
+ frames[j]->i_type = X264_TYPE_P;
+ frames[j+1]->i_type = idr_frame_type;
+ reset_start = j+2;
+ break;
}
- frames[j]->i_type = X264_TYPE_B;
- }
- frames[max_bframes+1]->i_type = X264_TYPE_P;
- }
+
+ /* Restore frametypes for all frames that haven't actually been decided yet. */
+ for( j = reset_start; j <= num_frames; j++ )
+ frames[j]->i_type = X264_TYPE_AUTO;
}
void x264_slicetype_decide( x264_t *h )
x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
}
else if( (h->param.i_bframe && h->param.i_bframe_adaptive)
- || h->param.i_scenecut_threshold )
- x264_slicetype_analyse( h );
+ || h->param.i_scenecut_threshold
+ || h->param.rc.b_mb_tree )
+ x264_slicetype_analyse( h, 0 );
for( bframes = 0;; bframes++ )
{
frm->i_type = X264_TYPE_P;
}
- if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B;
+ if( frm->i_type == X264_TYPE_AUTO )
+ frm->i_type = X264_TYPE_B;
+
else if( !IS_X264_TYPE_B( frm->i_type ) ) break;
}
}
int x264_rc_analyse_slice( x264_t *h )
{
x264_mb_analysis_t a;
- x264_frame_t *frames[X264_BFRAME_MAX*4+2] = { NULL, };
+ x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, };
int p0=0, p1, b;
int cost;
if( IS_X264_TYPE_I(h->fenc->i_type) )
{
p1 = b = 0;
+ /* For MB-tree, we have to perform propagation analysis on I-frames too. */
+ if( h->param.rc.b_mb_tree )
+ {
+ h->frames.last_nonb = h->fenc;
+ x264_slicetype_analyse( h, 1 );
+ }
}
else if( X264_TYPE_P == h->fenc->i_type )
{
frames[p0] = h->fref0[0];
frames[b] = h->fenc;
- cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
+ if( h->param.rc.b_mb_tree )
+ cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b );
+ else
+ {
+ cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
- /* In AQ, use the weighted score instead. */
- if( h->param.rc.i_aq_mode )
- cost = frames[b]->i_cost_est[b-p0][p1-b];
+ /* In AQ, use the weighted score instead. */
+ if( h->param.rc.i_aq_mode )
+ cost = frames[b]->i_cost_est[b-p0][p1-b];
+ }
h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
H0( " - baseline,main,high\n" );
H0( " --preset Use a preset to select encoding settings [medium]\n" );
H0( " Overridden by user settings\n");
- H1( " - ultrafast,veryfast,fast,medium\n"
- " - slow,slower,placebo\n" );
- else H0( " - ultrafast,veryfast,fast,medium,slow,slower\n" );
+ H0( " - ultrafast,veryfast,faster,fast\n"
+ " - medium,slow,slower,placebo\n" );
H0( " --tune Tune the settings for a particular type of source\n" );
H0( " Overridden by user settings\n");
H1( " - film,animation,grain,psnr,ssim\n"
H0( " -q, --qp <integer> Set QP (0-51, 0=lossless)\n" );
H0( " -B, --bitrate <integer> Set bitrate (kbit/s)\n" );
H0( " --crf <float> Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
+ H0( " --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
H0( " --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
H0( " --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
H1( " --vbv-init <float> Initial VBV buffer occupancy [%.1f]\n", defaults->rc.f_vbv_buffer_init );
" - 2: Last pass, does not overwrite stats file\n"
" - 3: Nth pass, overwrites stats file\n" );
H0( " --stats <string> Filename for 2 pass stats [\"%s\"]\n", defaults->rc.psz_stat_out );
+ H0( " --no-mbtree Disable mb-tree ratecontrol.\n");
H0( " --qcomp <float> QP curve compression: 0.0 => CBR, 1.0 => CQP [%.2f]\n", defaults->rc.f_qcompress );
H1( " --cplxblur <float> Reduce fluctuations in QP (before curve compression) [%.1f]\n", defaults->rc.f_complexity_blur );
H1( " --qblur <float> Reduce fluctuations in QP (after curve compression) [%.1f]\n", defaults->rc.f_qblur );
" #1: RD (requires subme>=6)\n"
" #2: Trellis (requires trellis, experimental)\n",
defaults->analyse.f_psy_rd, defaults->analyse.f_psy_trellis );
+ H1( " --no-psy Disable all visual optimizations that worsen\n"
+ " both PSNR and SSIM.\n" );
H0( " --no-mixed-refs Don't decide references on a per partition basis\n" );
H1( " --no-chroma-me Ignore chroma in motion estimation\n" );
H0( " --no-8x8dct Disable adaptive spatial transform size\n" );
{ "qpmax", required_argument, NULL, 0 },
{ "qpstep", required_argument, NULL, 0 },
{ "crf", required_argument, NULL, 0 },
+ { "rc-lookahead",required_argument, NULL, 0 },
{ "ref", required_argument, NULL, 'r' },
{ "asm", required_argument, NULL, 0 },
{ "no-asm", no_argument, NULL, 0 },
{ "mvrange-thread", required_argument, NULL, 0 },
{ "subme", required_argument, NULL, 'm' },
{ "psy-rd", required_argument, NULL, 0 },
+ { "no-psy", no_argument, NULL, 0 },
+ { "psy", no_argument, NULL, 0 },
{ "mixed-refs", no_argument, NULL, 0 },
{ "no-mixed-refs", no_argument, NULL, 0 },
{ "no-chroma-me", no_argument, NULL, 0 },
{ "pass", required_argument, NULL, 'p' },
{ "stats", required_argument, NULL, 0 },
{ "qcomp", required_argument, NULL, 0 },
+ { "mbtree", no_argument, NULL, 0 },
+ { "no-mbtree", no_argument, NULL, 0 },
{ "qblur", required_argument, NULL, 0 },
{ "cplxblur", required_argument, NULL, 0 },
{ "zones", required_argument, NULL, 0 },
param->rc.i_aq_mode = 0;
param->analyse.b_mixed_references = 0;
param->analyse.i_trellis = 0;
+ param->i_bframe_adaptive = X264_B_ADAPT_NONE;
+ param->rc.b_mb_tree = 0;
}
else if( !strcasecmp( optarg, "veryfast" ) )
{
param->i_frame_reference = 1;
param->analyse.b_mixed_references = 0;
param->analyse.i_trellis = 0;
+ param->rc.b_mb_tree = 0;
}
- else if( !strcasecmp( optarg, "fast" ) )
+ else if( !strcasecmp( optarg, "faster" ) )
{
param->analyse.b_mixed_references = 0;
param->i_frame_reference = 2;
param->analyse.i_subpel_refine = 4;
+ param->rc.b_mb_tree = 0;
+ }
+ else if( !strcasecmp( optarg, "fast" ) )
+ {
+ param->i_frame_reference = 2;
+ param->analyse.i_subpel_refine = 6;
+ param->rc.i_lookahead = 30;
}
else if( !strcasecmp( optarg, "medium" ) )
{
param->i_frame_reference = 5;
param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
+ param->rc.i_lookahead = 50;
}
else if( !strcasecmp( optarg, "slower" ) )
{
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
param->analyse.inter |= X264_ANALYSE_PSUB8x8;
param->analyse.i_trellis = 2;
+ param->rc.i_lookahead = 60;
}
else if( !strcasecmp( optarg, "placebo" ) )
{
param->analyse.b_fast_pskip = 0;
param->analyse.i_trellis = 2;
param->i_bframe = 16;
+ param->rc.i_lookahead = 60;
}
else
{
}
else if( !strcasecmp( optarg, "psnr" ) )
{
- param->analyse.f_psy_rd = 0;
param->rc.i_aq_mode = X264_AQ_NONE;
+ param->analyse.b_psy = 0;
}
else if( !strcasecmp( optarg, "ssim" ) )
{
- param->analyse.f_psy_rd = 0;
param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
+ param->analyse.b_psy = 0;
}
else if( !strcasecmp( optarg, "fastdecode" ) )
{
param->i_deblocking_filter_alphac0 = -1;
param->i_deblocking_filter_beta = -1;
param->analyse.f_psy_trellis = 0.2;
- param->rc.f_ip_factor = 2.1;
param->rc.f_aq_strength = 1.3;
if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
param->analyse.inter |= X264_ANALYSE_PSUB8x8;
#include <stdarg.h>
-#define X264_BUILD 68
+#define X264_BUILD 69
/* x264_t:
* opaque handler for encoder */
int i_noise_reduction; /* adaptive pseudo-deadzone */
float f_psy_rd; /* Psy RD strength */
float f_psy_trellis; /* Psy trellis strength */
+ int b_psy; /* Toggle all psy optimizations */
/* the deadzone size that will be used in luma quantization */
int i_luma_deadzone[2]; /* {inter, intra} */
int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */
float f_aq_strength;
+ int b_mb_tree; /* Macroblock-tree ratecontrol. */
+ int i_lookahead;
/* 2pass */
int b_stat_write; /* Enable stat writing in psz_stat_out */