Merge Dylan's Google Summer of Code 2009 tree.
Detect fades and use weighted prediction to improve compression and quality.
"Blind" mode provides a small overall quality increase by using a -1 offset without doing any analysis, as described in JVT-AB033.
"Smart", the default mode, also performs fade detection and decides weights accordingly.
MB-tree takes into account the effects of "smart" analysis in lookahead, even further improving quality in fades.
If psy is on, mbtree is on, interlaced is off, and weightp is off, fade detection will still be performed.
However, it will be used to adjust quality instead of create actual weights.
This will improve quality in fades when encoding in Baseline profile.
Doesn't add support for interlaced encoding with weightp yet.
Only adds support for luma weights, not chroma weights.
Internal code for chroma weights is in, but there's no analysis yet.
Baseline profile requires that weightp be off.
All weightp modes may cause minor breakage in non-compliant decoders that take shortcuts in deblocking reference frame checks.
"Smart" may cause serious breakage in non-compliant decoders that take shortcuts in handling of duplicate reference frames.
Thanks to Google for sponsoring our most successful Summer of Code yet!
param->analyse.i_chroma_qp_offset = 0;
param->analyse.b_fast_pskip = 1;
param->analyse.b_weighted_bipred = 1;
+ param->analyse.i_weighted_pred = X264_WEIGHTP_SMART;
param->analyse.b_dct_decimate = 1;
param->analyse.b_transform_8x8 = 1;
param->analyse.i_trellis = 1;
p->analyse.b_transform_8x8 = atobool(value);
OPT2("weightb", "weight-b")
p->analyse.b_weighted_bipred = atobool(value);
+ OPT("weightp")
+ p->analyse.i_weighted_pred = atoi(value);
OPT2("direct", "direct-pred")
b_error |= parse_enum( value, x264_direct_pred_names, &p->analyse.i_direct_mv_pred );
OPT("chroma-qp-offset")
p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
}
+ s += sprintf( s, " wpredp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
#define X264_THREAD_HEIGHT 24
+/* WEIGHTP_FAKE is set when mb_tree & psy are enabled, but normal weightp is disabled
+ * (such as in baseline). It checks for fades in lookahead and adjusts qp accordingly
+ * to increase quality. Defined as (-1) so that if(i_weighted_pred > 0) is true only when
+ * real weights are being used. */
+
+#define X264_WEIGHTP_FAKE (-1)
+
/****************************************************************************
* Includes
****************************************************************************/
int arg;
} ref_pic_list_order[2][16];
+ /* P-frame weighting */
+ x264_weight_t weight[16][3];
+
int i_mmco_remove_from_end;
int i_mmco_command_count;
struct /* struct for future expansion */
/* Unused frames: 0 = fenc, 1 = fdec */
x264_frame_t **unused[2];
+ /* Unused blank frames (for duplicates) */
+ x264_frame_t **blank_unused;
+
/* frames used for reference + sentinels */
x264_frame_t *reference[16+2];
uint8_t *intra_border_backup[2][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
uint8_t (*nnz_backup)[16]; /* when using cavlc + 8x8dct, the deblocker uses a modified nnz */
+ /* buffer for weighted versions of the reference frames */
+ uint8_t *p_weight_buf[16];
+
/* current value */
int i_type;
int i_partition;
/* pointer over mb of the references */
int i_fref[2];
uint8_t *p_fref[2][32][4+2]; /* last: lN, lH, lV, lHV, cU, cV */
+ uint8_t *p_fref_w[32]; /* weighted fullpel luma */
uint16_t *p_integral[2][16];
/* fref stride */
/* */
int i_direct_score[2];
int i_direct_frames[2];
+ /* num p-frames weighted */
+ int i_wpred[3];
} stat;
frame->i_frame_num = -1;
frame->i_lines_completed = -1;
frame->b_fdec = b_fdec;
+ frame->orig = frame;
/* all 4 luma planes allocated together, since the cacheline split code
* requires them to be in-phase wrt cacheline alignment. */
else
{
CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
- frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
+ frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
}
+ frame->b_duplicate = 0;
+
if( b_fdec ) /* fdec frame */
{
CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
void x264_frame_delete( x264_frame_t *frame )
{
int i, j;
- for( i = 0; i < 4; i++ )
- x264_free( frame->buffer[i] );
- for( i = 0; i < 4; i++ )
- x264_free( frame->buffer_lowres[i] );
- for( i = 0; i < X264_BFRAME_MAX+2; i++ )
- for( j = 0; j < X264_BFRAME_MAX+2; j++ )
- x264_free( frame->i_row_satds[i][j] );
- for( j = 0; j < 2; j++ )
- for( i = 0; i <= X264_BFRAME_MAX; i++ )
- {
- x264_free( frame->lowres_mvs[j][i] );
- x264_free( frame->lowres_mv_costs[j][i] );
- }
- x264_free( frame->i_propagate_cost );
- for( j = 0; j <= X264_BFRAME_MAX+1; j++ )
- for( i = 0; i <= X264_BFRAME_MAX+1; i++ )
- {
- x264_free( frame->lowres_costs[j][i] );
- x264_free( frame->lowres_inter_types[j][i] );
- }
- x264_free( frame->f_qp_offset );
- x264_free( frame->f_qp_offset_aq );
- x264_free( frame->i_inv_qscale_factor );
- x264_free( frame->i_row_bits );
- x264_free( frame->i_row_qp );
- x264_free( frame->mb_type );
- x264_free( frame->mv[0] );
- x264_free( frame->mv[1] );
- x264_free( frame->ref[0] );
- x264_free( frame->ref[1] );
- x264_pthread_mutex_destroy( &frame->mutex );
- x264_pthread_cond_destroy( &frame->cv );
+ /* Duplicate frames are blank copies of real frames (including pointers),
+ * so freeing those pointers would cause a double free later. */
+ if( !frame->b_duplicate )
+ {
+ for( i = 0; i < 4; i++ )
+ x264_free( frame->buffer[i] );
+ for( i = 0; i < 4; i++ )
+ x264_free( frame->buffer_lowres[i] );
+ for( i = 0; i < X264_BFRAME_MAX+2; i++ )
+ for( j = 0; j < X264_BFRAME_MAX+2; j++ )
+ x264_free( frame->i_row_satds[i][j] );
+ for( j = 0; j < 2; j++ )
+ for( i = 0; i <= X264_BFRAME_MAX; i++ )
+ {
+ x264_free( frame->lowres_mvs[j][i] );
+ x264_free( frame->lowres_mv_costs[j][i] );
+ }
+ x264_free( frame->i_propagate_cost );
+ for( j = 0; j <= X264_BFRAME_MAX+1; j++ )
+ for( i = 0; i <= X264_BFRAME_MAX+1; i++ )
+ {
+ x264_free( frame->lowres_costs[j][i] );
+ x264_free( frame->lowres_inter_types[j][i] );
+ }
+ x264_free( frame->f_qp_offset );
+ x264_free( frame->f_qp_offset_aq );
+ x264_free( frame->i_inv_qscale_factor );
+ x264_free( frame->i_row_bits );
+ x264_free( frame->i_row_qp );
+ x264_free( frame->mb_type );
+ x264_free( frame->mv[0] );
+ x264_free( frame->mv[1] );
+ x264_free( frame->ref[0] );
+ x264_free( frame->ref[1] );
+ x264_pthread_mutex_destroy( &frame->mutex );
+ x264_pthread_cond_destroy( &frame->cv );
+ }
x264_free( frame );
}
int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
int i4p= mb_4x4+x+y*s4x4;\
int i4q= mbn_4x4+xn+yn*s4x4;\
- if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
+ int refs_equal;\
+ if( h->mb.ref[0][i8p] < 0 || h->mb.ref[0][i8q] < 0 )\
+ refs_equal = h->mb.ref[0][i8p] == h->mb.ref[0][i8q];\
+ else if( !h->mb.b_interlaced )\
+ refs_equal = h->fref0[h->mb.ref[0][i8p]]->i_poc == h->fref0[h->mb.ref[0][i8q]]->i_poc;\
+ else\
+ refs_equal = ( h->fref0[h->mb.ref[0][i8p]>>1]->i_poc == h->fref0[h->mb.ref[0][i8q]>>1]->i_poc ) &&\
+ ( (h->mb.ref[0][i8p]&1) == (h->mb.ref[0][i8q]&1) );\
+ if((!refs_equal ||\
abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
(h->sh.i_type == SLICE_TYPE_B &&\
frame->i_reference_count = 1;
frame->b_intra_calculated = 0;
frame->b_scenecut = 1;
+
+ memset( frame->weight, 0, sizeof(frame->weight) );
+ memset( frame->f_weighted_cost_delta, 0, sizeof(frame->f_weighted_cost_delta) );
+
+ return frame;
+}
+
+void x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame )
+{
+ assert( frame->i_reference_count > 0 );
+ frame->i_reference_count--;
+ if( frame->i_reference_count == 0 )
+ x264_frame_push( h->frames.blank_unused, frame );
+}
+
+x264_frame_t *x264_frame_pop_blank_unused( x264_t *h )
+{
+ x264_frame_t *frame;
+ if( h->frames.blank_unused[0] )
+ frame = x264_frame_pop( h->frames.blank_unused );
+ else
+ frame = x264_malloc( sizeof(x264_frame_t) );
+ if( !frame )
+ return NULL;
+ frame->b_duplicate = 1;
+ frame->i_reference_count = 1;
return frame;
}
} while( !b_ok );
}
+void x264_weight_scale_plane( x264_t *h, uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride,
+ int i_width, int i_height, x264_weight_t *w )
+{
+ int x;
+ /* Weight horizontal strips of height 16. This was found to be the optimal height
+ * in terms of the cache loads. */
+ while( i_height > 0 )
+ {
+ for( x = 0; x < i_width ; x += 16 )
+ w->weightfn[16>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) );
+ i_height -= 16;
+ dst += 16 * i_dst_stride;
+ src += 16 * i_src_stride;
+ }
+}
+
void x264_frame_delete_list( x264_frame_t **list )
{
int i = 0;
+ if( !list )
+ return;
while( list[i] )
x264_frame_delete( list[i++] );
x264_free( list );
#define PADH 32
#define PADV 32
-typedef struct
+typedef struct x264_frame
{
/* */
int i_poc;
uint8_t *buffer[4];
uint8_t *buffer_lowres[4];
+ x264_weight_t weight[16][3]; /* the weights for the P frames used to encode this frame */
+ uint8_t *weighted[16]; /* plane[0] weighted of the reference frames */
+ int b_duplicate;
+ struct x264_frame *orig;
+
/* motion data */
int8_t *mb_type;
int16_t (*mv[2])[2];
uint16_t *i_propagate_cost;
uint16_t *i_inv_qscale_factor;
int b_scenecut; /* Set to zero if the frame cannot possibly be part of a real scenecut. */
+ float f_weighted_cost_delta[X264_BFRAME_MAX+2];
/* vbv */
uint8_t i_planned_type[X264_LOOKAHEAD_MAX+1];
/* threading */
int i_lines_completed; /* in pixels */
+ int i_lines_weighted; /* FIXME: this only supports weighting of one reference frame */
int i_reference_count; /* number of threads using this frame (not necessarily the number of pointers) */
x264_pthread_mutex_t mutex;
x264_pthread_cond_t cv;
void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
x264_frame_t *x264_frame_shift( x264_frame_t **list );
void x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
+void x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame );
+x264_frame_t *x264_frame_pop_blank_unused( x264_t *h );
+void x264_weight_scale_plane( x264_t *h, uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride,
+ int i_width, int i_height, x264_weight_t *w );
x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
void x264_frame_sort( x264_frame_t **list, int b_dts );
void x264_frame_delete_list( x264_frame_t **list );
h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height );
+ mvx, mvy, 4*width, 4*height, &h->sh.weight[i_ref][0] );
// chroma is offset if MCing from a field of opposite parity
if( h->mb.b_interlaced & i_ref )
h->mb.pic.p_fref[0][i_ref][4], h->mb.pic.i_stride[1],
mvx, mvy, 2*width, 2*height );
+ if( h->sh.weight[i_ref][1].weightfn )
+ h->sh.weight[i_ref][1].weightfn[width>>1]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->sh.weight[i_ref][1], height*2 );
+
h->mc.mc_chroma( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
h->mb.pic.p_fref[0][i_ref][5], h->mb.pic.i_stride[2],
mvx, mvy, 2*width, 2*height );
+
+ if( h->sh.weight[i_ref][2].weightfn )
+ h->sh.weight[i_ref][2].weightfn[width>>1]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->sh.weight[i_ref][2],height*2 );
+
}
static NOINLINE void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
{
h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height );
+ mvx, mvy, 4*width, 4*height, weight_none );
if( h->mb.b_interlaced & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
uint8_t *src0, *src1;
src0 = h->mc.get_ref( tmp0, &i_stride0, h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
- mvx0, mvy0, 4*width, 4*height );
+ mvx0, mvy0, 4*width, 4*height, weight_none );
src1 = h->mc.get_ref( tmp1, &i_stride1, h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
- mvx1, mvy1, 4*width, 4*height );
+ mvx1, mvy1, 4*width, 4*height, weight_none );
h->mc.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
src0, i_stride0, src1, i_stride1, weight );
for( i=0; i<2; i++ )
{
int i_refs = X264_MIN(16, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ i_refs = X264_MIN(16, i_refs + 2); //smart weights add two duplicate frames
+ else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
+ i_refs = X264_MIN(16, i_refs + 1); //blind weights add one duplicate frame
+
for( j=0; j < i_refs; j++ )
CHECKED_MALLOC( h->mb.mvr[i][j], 2 * i_mb_count * sizeof(int16_t) );
}
+ if( h->param.analyse.i_weighted_pred )
+ {
+ int i_padv = PADV << h->param.b_interlaced;
+#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
+ int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
+ int i_stride, luma_plane_size;
+ int numweightbuf;
+
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE )
+ {
+ // only need buffer for lookahead thread
+ if( !h->param.i_sync_lookahead || h == h->thread[h->param.i_threads] )
+ {
+ // Fake analysis only works on lowres
+ i_stride = ALIGN( h->sps->i_mb_width*8 + 2*PADH, align );
+ luma_plane_size = i_stride * (h->sps->i_mb_height*8+2*i_padv);
+ // Only need 1 buffer for analysis
+ numweightbuf = 1;
+ }
+ else
+ numweightbuf = 0;
+ }
+ else
+ {
+ i_stride = ALIGN( h->sps->i_mb_width*16 + 2*PADH, align );
+ luma_plane_size = i_stride * (h->sps->i_mb_height*16+2*i_padv);
+
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ //SMART can weight one ref and one offset -1
+ numweightbuf = 2;
+ else
+ //blind only has one weighted copy (offset -1)
+ numweightbuf = 1;
+ }
+
+ for( i = 0; i < numweightbuf; i++ )
+ CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size );
+#undef ALIGN
+ }
+
for( i=0; i<=h->param.b_interlaced; i++ )
for( j=0; j<3; j++ )
{
for( i=0; i<2; i++ )
for( j=0; j<32; j++ )
x264_free( h->mb.mvr[i][j] );
+ for( i=0; i<16; i++ )
+ x264_free( h->mb.p_weight_buf[i] );
+
if( h->param.b_cabac )
{
x264_free( h->mb.chroma_pred_mode );
{
h->mb.pic.p_fref[0][j][i==0 ? 0:i+3] = &fref[0][j >> h->mb.b_interlaced]->plane[i][ref_pix_offset[j&1]];
if( i == 0 )
+ {
for( k = 1; k < 4; k++ )
h->mb.pic.p_fref[0][j][k] = &fref[0][j >> h->mb.b_interlaced]->filtered[k][ref_pix_offset[j&1]];
+ if( h->sh.weight[j][0].weightfn )
+ h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> h->mb.b_interlaced][ref_pix_offset[j&1]];
+ else
+ h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
+ }
}
if( h->sh.i_type == SLICE_TYPE_B )
for( j = 0; j < h->mb.pic.i_fref[1]; j++ )
PIXEL_AVG_C( pixel_avg_2x4, 2, 4 )
PIXEL_AVG_C( pixel_avg_2x2, 2, 2 )
+static void x264_weight_cache( x264_t *h, x264_weight_t *w )
+{
+ w->weightfn = h->mc.weight;
+}
+#define opscale(x) dst[x] = x264_clip_uint8( ( ( ( src[x] * weight->i_scale ) + (1<<(weight->i_denom - 1) ) )>> weight->i_denom ) + weight->i_offset )
+#define opscale_noden(x) dst[x] = x264_clip_uint8( ( src[x] * weight->i_scale ) + weight->i_offset )
+static inline void mc_weight( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
+{
+
+ int x, y;
+ if( weight->i_denom >= 1 )
+ {
+ for( y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
+ {
+ for( x = 0; x < i_width; x++ )
+ opscale( x );
+ }
+ }
+ else
+ {
+ for( y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
+ for( x = 0; x < i_width; x++ )
+ opscale_noden( x );
+ }
+}
+
+#define MC_WEIGHT_C( name, lx ) \
+ static void name( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, const x264_weight_t *weight, int height ) \
+{ \
+ int x, y; \
+ if( weight->i_denom >= 1 ) \
+ { \
+ for( y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
+ for( x = 0; x < lx; x++ ) \
+ opscale( x ); \
+ } \
+ else \
+ { \
+ for( y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
+ for( x = 0; x < lx; x++ ) \
+ opscale_noden( x ); \
+ } \
+}
+
+MC_WEIGHT_C( mc_weight_w20, 20 )
+MC_WEIGHT_C( mc_weight_w16, 16 )
+MC_WEIGHT_C( mc_weight_w12, 12 )
+MC_WEIGHT_C( mc_weight_w8, 8 )
+MC_WEIGHT_C( mc_weight_w4, 4 )
+MC_WEIGHT_C( mc_weight_w2, 2 )
+
+static weight_fn_t x264_mc_weight_wtab[6] =
+{
+ mc_weight_w2,
+ mc_weight_w4,
+ mc_weight_w8,
+ mc_weight_w12,
+ mc_weight_w16,
+ mc_weight_w20,
+};
+const x264_weight_t weight_none[3] = { {{0}} };
static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
{
int y;
static void mc_luma( uint8_t *dst, int i_dst_stride,
uint8_t *src[4], int i_src_stride,
int mvx, int mvy,
- int i_width, int i_height )
+ int i_width, int i_height, const x264_weight_t *weight )
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height );
+ if( weight->weightfn )
+ mc_weight( dst, i_dst_stride, dst, i_dst_stride, weight, i_width, i_height );
}
+ else if( weight->weightfn )
+ mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
else
- {
mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
- }
}
static uint8_t *get_ref( uint8_t *dst, int *i_dst_stride,
uint8_t *src[4], int i_src_stride,
int mvx, int mvy,
- int i_width, int i_height )
+ int i_width, int i_height, const x264_weight_t *weight )
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height );
+ if( weight->weightfn )
+ mc_weight( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_width, i_height );
+ return dst;
+ }
+ else if( weight->weightfn )
+ {
+ mc_weight( dst, *i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
return dst;
}
else
pf->avg[PIXEL_2x4] = pixel_avg_2x4;
pf->avg[PIXEL_2x2] = pixel_avg_2x2;
+ pf->weight = x264_mc_weight_wtab;
+ pf->offsetadd = x264_mc_weight_wtab;
+ pf->offsetsub = x264_mc_weight_wtab;
+ pf->weight_cache = x264_weight_cache;
+
pf->copy_16x16_unaligned = mc_copy_w16;
pf->copy[PIXEL_16x16] = mc_copy_w16;
pf->copy[PIXEL_8x8] = mc_copy_w8;
#ifndef X264_MC_H
#define X264_MC_H
+struct x264_weight_t;
+typedef void (* weight_fn_t)( uint8_t *, int, uint8_t *,int, const struct x264_weight_t *, int );
+typedef struct x264_weight_t
+{
+ /* aligning the first member is a gcc hack to force the struct to be
+ * 16 byte aligned, as well as force sizeof(struct) to be a multiple of 16 */
+ ALIGNED_16( int16_t cachea[8] );
+ int16_t cacheb[8];
+ int32_t i_denom;
+ int32_t i_scale;
+ int32_t i_offset;
+ weight_fn_t *weightfn;
+} ALIGNED_16( x264_weight_t );
+
+extern const x264_weight_t weight_none[3];
+
+#define SET_WEIGHT( w, b, s, d, o )\
+{\
+ (w).i_scale = (s);\
+ (w).i_denom = (d);\
+ (w).i_offset = (o);\
+ if( b )\
+ h->mc.weight_cache( h, &w );\
+ else\
+ w.weightfn = NULL;\
+}
+
/* Do the MC
* XXX: Only width = 4, 8 or 16 are valid
* width == 4 -> height == 4 or 8
{
void (*mc_luma)(uint8_t *dst, int i_dst, uint8_t **src, int i_src,
int mvx, int mvy,
- int i_width, int i_height );
+ int i_width, int i_height, const x264_weight_t *weight );
/* may round up the dimensions if they're not a power of 2 */
uint8_t* (*get_ref)(uint8_t *dst, int *i_dst, uint8_t **src, int i_src,
int mvx, int mvy,
- int i_width, int i_height );
+ int i_width, int i_height, const x264_weight_t *weight );
/* mc_chroma may write up to 2 bytes of garbage to the right of dst,
* so it must be run from left to right. */
void (*frame_init_lowres_core)( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
int src_stride, int dst_stride, int width, int height );
+ weight_fn_t *weight;
+ weight_fn_t *offsetadd;
+ weight_fn_t *offsetsub;
+ void (*weight_cache)( x264_t *, x264_weight_t * );
void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
uint16_t *inter_costs, uint16_t *inv_qscales, int len );
;* Authors: Loren Merritt <lorenm@u.washington.edu>
;* Fiona Glaser <fiona@x264.com>
;* Laurent Aimar <fenrir@via.ecp.fr>
+;* Dylan Yudaken <dyudaken@gmail.com>
;* Min Chen <chenm001.163.com>
;*
;* This program is free software; you can redistribute it and/or modify
SECTION_RODATA 32
ch_shuffle: db 0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,0,0
+pw_1: times 8 dw 1
pw_4: times 8 dw 4
pw_8: times 8 dw 8
pw_32: times 8 dw 32
SECTION .text
;=============================================================================
-; weighted prediction
+; implicit weighted biprediction
;=============================================================================
-; implicit bipred only:
; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64
%ifdef ARCH_X86_64
DECLARE_REG_TMP 0,1,2,3,4,5,10,11
%endmacro
%endif
-%macro SPLATW 2
+%macro SPLATW 2-3 0
%if mmsize==16
- pshuflw %1, %2, 0
+ pshuflw %1, %2, %3*0x55
punpcklqdq %1, %1
%else
- pshufw %1, %2, 0
+ pshufw %1, %2, %3*0x55
%endif
%endmacro
AVG_WEIGHT ssse3, 8, 7
AVG_WEIGHT ssse3, 16, 7
+;=============================================================================
+; P frame explicit weighted prediction
+;=============================================================================
+
+%macro WEIGHT_START 1
+ mova m3, [r4]
+ mova m6, [r4+16]
+ movd m5, [r4+32]
+ pxor m2, m2
+%if (%1 == 20 || %1 == 12) && mmsize == 16
+ movdq2q mm3, xmm3
+ movdq2q mm4, xmm4
+ movdq2q mm5, xmm5
+ movdq2q mm6, xmm6
+ pxor mm2, mm2
+%endif
+%endmacro
+
+%macro WEIGHT_START_SSSE3 1
+ mova m3, [r4]
+ mova m4, [r4+16]
+ pxor m2, m2
+%if ( %1 == 20 || %1 == 12 )
+ movdq2q mm3, xmm3
+ movdq2q mm4, xmm4
+ pxor mm2, mm2
+%endif
+%endmacro
+
+;; macro to weight mmsize bytes taking half from %1 and half from %2
+%macro WEIGHT 2 ; (src1,src2)
+ movh m0, [%1]
+ movh m1, [%2]
+ punpcklbw m0, m2 ;setup
+ punpcklbw m1, m2 ;setup
+ pmullw m0, m3 ;scale
+ pmullw m1, m3 ;scale
+ paddsw m0, m6 ;1<<(denom-1)+(offset<<denom)
+ paddsw m1, m6 ;1<<(denom-1)+(offset<<denom)
+ psraw m0, m5 ;denom
+ psraw m1, m5 ;denom
+%endmacro
+
+%macro WEIGHT_SSSE3 2
+ movh m0, [%1]
+ movh m1, [%2]
+ punpcklbw m0, m2
+ punpcklbw m1, m2
+ psllw m0, 7
+ psllw m1, 7
+ pmulhrsw m0, m3
+ pmulhrsw m1, m3
+ paddw m0, m4
+ paddw m1, m4
+%endmacro
+
+%macro WEIGHT_SAVE_ROW 3 ;(src,dst,width)
+%if %3 == 16
+ mova [%2], %1
+%elif %3 == 8
+ movq [%2], %1
+%else
+ movd [%2], %1 ; width 2 can write garbage for last 2 bytes
+%endif
+%endmacro
+
+%macro WEIGHT_ROW 3 ; (src,dst,width)
+ ;; load weights
+ WEIGHT %1, (%1+(mmsize/2))
+ packuswb m0, m1 ;put bytes into m0
+ WEIGHT_SAVE_ROW m0, %2, %3
+%endmacro
+
+%macro WEIGHT_SAVE_COL 2 ;(dst,size)
+%if %2 == 8
+ packuswb m0, m1
+ movq [%1], m0
+ movhps [%1+r1], m0
+%else
+ packuswb m0, m0
+ packuswb m1, m1
+ movd [%1], m0 ; width 2 can write garbage for last 2 bytes
+ movd [%1+r1], m1
+%endif
+%endmacro
+
+%macro WEIGHT_COL 3 ; (src,dst,width)
+%if %3 <= 4 && mmsize == 16
+ INIT_MMX
+ ;; load weights
+ WEIGHT %1, (%1+r3)
+ WEIGHT_SAVE_COL %2, %3
+ INIT_XMM
+%else
+ WEIGHT %1, (%1+r3)
+ WEIGHT_SAVE_COL %2, %3
+%endif
+
+%endmacro
+
+%macro WEIGHT_TWO_ROW 3 ; (src,dst,width)
+%assign x 0
+%rep %3
+%if (%3-x) >= mmsize
+ WEIGHT_ROW (%1+x), (%2+x), mmsize ; weight 1 mmsize
+ WEIGHT_ROW (%1+r3+x), (%2+r1+x), mmsize ; weight 1 mmsize
+ %assign x (x+mmsize)
+%else
+ WEIGHT_COL (%1+x),(%2+x),(%3-x)
+ %exitrep
+%endif
+%if x >= %3
+ %exitrep
+%endif
+%endrep
+%endmacro
+
+
+;void x264_mc_weight_wX( uint8_t *dst, int i_dst_stride, uint8_t *src,int i_src_stride, x264_weight_t *weight,int h)
+
+%ifdef ARCH_X86_64
+%define NUMREGS 6
+%define LOAD_HEIGHT
+%define HEIGHT_REG r5d
+%else
+%define NUMREGS 5
+%define LOAD_HEIGHT mov r4d, r5m
+%define HEIGHT_REG r4d
+%endif
+
+%macro WEIGHTER 2
+ cglobal x264_mc_weight_w%1_%2, NUMREGS, NUMREGS, 7
+ WEIGHT_START %1
+ LOAD_HEIGHT
+.loop:
+ WEIGHT_TWO_ROW r2, r0, %1
+ lea r0, [r0+r1*2]
+ lea r2, [r2+r3*2]
+ sub HEIGHT_REG, 2
+ jg .loop
+ REP_RET
+%endmacro
+
+INIT_MMX
+WEIGHTER 4, mmxext
+WEIGHTER 8, mmxext
+WEIGHTER 12, mmxext
+WEIGHTER 16, mmxext
+WEIGHTER 20, mmxext
+INIT_XMM
+WEIGHTER 8, sse2
+WEIGHTER 16, sse2
+WEIGHTER 20, sse2
+%define WEIGHT WEIGHT_SSSE3
+%define WEIGHT_START WEIGHT_START_SSSE3
+INIT_MMX
+WEIGHTER 4, ssse3
+INIT_XMM
+WEIGHTER 8, ssse3
+WEIGHTER 16, ssse3
+WEIGHTER 20, ssse3
+
+%macro OFFSET_OP 7
+ mov%6 m0, [%1]
+ mov%6 m1, [%2]
+ p%5usb m0, m2
+ p%5usb m1, m2
+ mov%7 [%3], m0
+ mov%7 [%4], m1
+%endmacro
+
+%macro OFFSET_TWO_ROW 4
+%assign x 0
+%rep %3
+%if (%3-x) >= mmsize
+ OFFSET_OP (%1+x), (%1+x+r3), (%2+x), (%2+x+r1), %4, u, a
+ %assign x (x+mmsize)
+%else
+ OFFSET_OP (%1+x),(%1+x+r3), (%2+x), (%2+x+r1), %4, d, d
+ %exitrep
+%endif
+%if x >= %3
+ %exitrep
+%endif
+%endrep
+%endmacro
+
+;void x264_mc_offset_wX( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, x264_weight_t *w, int h )
+%macro OFFSET 3
+ cglobal x264_mc_offset%3_w%1_%2, NUMREGS, NUMREGS
+ mova m2, [r4]
+ LOAD_HEIGHT
+.loop:
+ OFFSET_TWO_ROW r2, r0, %1, %3
+ lea r0, [r0+r1*2]
+ lea r2, [r2+r3*2]
+ sub HEIGHT_REG, 2
+ jg .loop
+ REP_RET
+%endmacro
+
+%macro OFFSETPN 2
+ OFFSET %1, %2, add
+ OFFSET %1, %2, sub
+%endmacro
+INIT_MMX
+OFFSETPN 4, mmxext
+OFFSETPN 8, mmxext
+OFFSETPN 12, mmxext
+OFFSETPN 16, mmxext
+OFFSETPN 20, mmxext
+INIT_XMM
+OFFSETPN 12, sse2
+OFFSETPN 16, sse2
+OFFSETPN 20, sse2
+%undef LOAD_HEIGHT
+%undef HEIGHT_REG
+%undef NUMREGS
+
;=============================================================================
DECL_SUF( x264_pixel_avg_4x8, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
DECL_SUF( x264_pixel_avg_4x4, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
DECL_SUF( x264_pixel_avg_4x2, ( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ))
+
+#define MC_WEIGHT(w,type) \
+ extern void x264_mc_weight_w##w##_##type( uint8_t *,int, uint8_t *,int, const x264_weight_t *,int );
+
+#define MC_WEIGHT_OFFSET(w,type) \
+ extern void x264_mc_offsetadd_w##w##_##type( uint8_t *,int, uint8_t *,int, const x264_weight_t *,int ); \
+ extern void x264_mc_offsetsub_w##w##_##type( uint8_t *,int, uint8_t *,int, const x264_weight_t *,int ); \
+ MC_WEIGHT(w,type)
+
+MC_WEIGHT_OFFSET( 4, mmxext )
+MC_WEIGHT_OFFSET( 8, mmxext )
+MC_WEIGHT_OFFSET( 12, mmxext )
+MC_WEIGHT_OFFSET( 16, mmxext )
+MC_WEIGHT_OFFSET( 20, mmxext )
+MC_WEIGHT_OFFSET( 12, sse2 )
+MC_WEIGHT_OFFSET( 16, sse2 )
+MC_WEIGHT_OFFSET( 20, sse2 )
+MC_WEIGHT( 8, sse2 )
+MC_WEIGHT( 4, ssse3 )
+MC_WEIGHT( 8, ssse3 )
+MC_WEIGHT( 12, ssse3 )
+MC_WEIGHT( 16, ssse3 )
+MC_WEIGHT( 20, ssse3 )
+#undef MC_OFFSET
+#undef MC_WEIGHT
+
extern void x264_mc_copy_w4_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w8_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_mmx( uint8_t *, int, uint8_t *, int, int );
MC_COPY_WTAB(mmx,mmx,mmx,mmx)
MC_COPY_WTAB(sse2,mmx,mmx,sse2)
+#define MC_WEIGHT_WTAB(function, instr, name1, name2, w12version)\
+ static void (* x264_mc_##function##_wtab_##instr[6])( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int ) =\
+{\
+ x264_mc_##function##_w4_##name1,\
+ x264_mc_##function##_w4_##name1,\
+ x264_mc_##function##_w8_##name2,\
+ x264_mc_##function##_w##w12version##_##instr,\
+ x264_mc_##function##_w16_##instr,\
+ x264_mc_##function##_w20_##instr,\
+};
+
+MC_WEIGHT_WTAB(weight,mmxext,mmxext,mmxext,12)
+MC_WEIGHT_WTAB(offsetadd,mmxext,mmxext,mmxext,12)
+MC_WEIGHT_WTAB(offsetsub,mmxext,mmxext,mmxext,12)
+MC_WEIGHT_WTAB(weight,sse2,mmxext,sse2,16)
+MC_WEIGHT_WTAB(offsetadd,sse2,mmxext,mmxext,16)
+MC_WEIGHT_WTAB(offsetsub,sse2,mmxext,mmxext,16)
+MC_WEIGHT_WTAB(weight,ssse3,ssse3,ssse3,16)
+
+static void x264_weight_cache_mmxext( x264_t *h, x264_weight_t *w )
+{
+ int i;
+ int16_t den1;
+
+ if( w->i_scale == 1<<w->i_denom )
+ {
+ if( w->i_offset < 0 )
+ w->weightfn = h->mc.offsetsub;
+ else
+ w->weightfn = h->mc.offsetadd;
+ memset( w->cachea, abs(w->i_offset), sizeof(w->cachea) );
+ return;
+ }
+ w->weightfn = h->mc.weight;
+ den1 = 1 << ( w->i_denom - 1 ) | w->i_offset << w->i_denom;
+ for( i = 0; i < 8; i++ )
+ {
+ w->cachea[i] = w->i_scale;
+ w->cacheb[i] = den1;
+ }
+}
+
+static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w )
+{
+ int i, den1;
+ if( w->i_scale == 1<<w->i_denom )
+ {
+ if( w->i_offset < 0 )
+ w->weightfn = h->mc.offsetsub;
+ else
+ w->weightfn = h->mc.offsetadd;
+
+ memset( w->cachea, abs( w->i_offset ), sizeof(w->cachea) );
+ return;
+ }
+ w->weightfn = h->mc.weight;
+ den1 = ( w->i_scale ) << ( 8- w->i_denom );
+ for(i = 0;i<8;i++)
+ {
+ w->cachea[i] = den1 ;
+ w->cacheb[i] = w->i_offset;
+ }
+}
+
static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
static void mc_luma_##name( uint8_t *dst, int i_dst_stride,\
uint8_t *src[4], int i_src_stride,\
int mvx, int mvy,\
- int i_width, int i_height )\
+ int i_width, int i_height, const x264_weight_t *weight )\
{\
int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
x264_pixel_avg_wtab_##instr1[i_width>>2](\
dst, i_dst_stride, src1, i_src_stride,\
src2, i_height );\
+ if( weight->weightfn )\
+ weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );\
}\
+ else if( weight->weightfn )\
+ weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );\
else\
- {\
- x264_mc_copy_wtab_##instr2[i_width>>2](\
- dst, i_dst_stride, src1, i_src_stride, i_height );\
- }\
+ x264_mc_copy_wtab_##instr2[i_width>>2](dst, i_dst_stride, src1, i_src_stride, i_height );\
}
MC_LUMA(mmxext,mmxext,mmx)
static uint8_t *get_ref_##name( uint8_t *dst, int *i_dst_stride,\
uint8_t *src[4], int i_src_stride,\
int mvx, int mvy,\
- int i_width, int i_height )\
+ int i_width, int i_height, const x264_weight_t *weight )\
{\
int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
x264_pixel_avg_wtab_##name[i_width>>2](\
dst, *i_dst_stride, src1, i_src_stride,\
src2, i_height );\
+ if( weight->weightfn ) \
+ weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height ); \
+ return dst;\
+ }\
+ else if( weight->weightfn ) \
+ {\
+ weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );\
return dst;\
}\
else\
pf->get_ref = get_ref_mmxext;
pf->mc_chroma = x264_mc_chroma_mmxext;
+ pf->weight = x264_mc_weight_wtab_mmxext;
+ pf->offsetadd = x264_mc_offsetadd_wtab_mmxext;
+ pf->offsetsub = x264_mc_offsetsub_wtab_mmxext;
+ pf->weight_cache = x264_weight_cache_mmxext;
+
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmxext;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_mmxext;
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_mmxext;
if( cpu&X264_CPU_SSE2_IS_SLOW )
return;
+ pf->weight = x264_mc_weight_wtab_sse2;
+ pf->offsetadd = x264_mc_offsetadd_wtab_sse2;
+ pf->offsetsub = x264_mc_offsetsub_wtab_sse2;
+
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2;
pf->mc_chroma = x264_mc_chroma_ssse3_cache64;
pf->mc_luma = mc_luma_cache64_ssse3;
pf->get_ref = get_ref_cache64_ssse3;
+
+ /* ssse3 weight is slower on Nehalem, so only assign here. */
+ pf->weight_cache = x264_weight_cache_ssse3;
+ pf->weight = x264_mc_weight_wtab_ssse3;
}
if( cpu&X264_CPU_SHUFFLE_IS_FAST )
int i_ref = i ? h->i_ref1 : h->i_ref0;
for( j=0; j<i_ref; j++ )
{
- x264_frame_cond_wait( fref[j], thresh );
+ x264_frame_cond_wait( fref[j]->orig, thresh );
+ fref[j]->i_lines_completed = fref[j]->orig->i_lines_completed;
thread_mvy_range = X264_MIN( thread_mvy_range, fref[j]->i_lines_completed - pix_y );
}
}
+
if( h->param.b_deterministic )
thread_mvy_range = h->param.analyse.i_mv_range_thread;
if( h->mb.b_interlaced )
thread_mvy_range >>= 1;
+
+ for( j=0; j<h->i_ref0; j++ )
+ {
+ if( h->sh.weight[j][0].weightfn )
+ {
+ x264_frame_t *frame = h->fref0[j];
+ int width = frame->i_width[0] + 2*PADH;
+ int i_padv = PADV << h->param.b_interlaced;
+ int offset, height;
+ uint8_t *src = frame->filtered[0] - frame->i_stride[0]*i_padv - PADH;
+ int k;
+ height = X264_MIN( 16 + thread_mvy_range + pix_y + i_padv, h->fref0[j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
+ offset = h->fenc->i_lines_weighted*frame->i_stride[0];
+ h->fenc->i_lines_weighted += height;
+ if( height )
+ {
+ for( k = j; k < h->i_ref0; k++ )
+ if( h->sh.weight[k][0].weightfn )
+ {
+ uint8_t *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
+ x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
+ src + offset, frame->i_stride[0],
+ width, height, &h->sh.weight[k][0] );
+ }
+ }
+ break;
+ }
+ }
}
h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
}
#define LOAD_FENC( m, src, xoff, yoff) \
+ (m)->p_cost_mv = a->p_cost_mv; \
(m)->i_stride[0] = h->mb.pic.i_stride[0]; \
(m)->i_stride[1] = h->mb.pic.i_stride[1]; \
(m)->p_fenc[0] = &(src)[0][(xoff)+(yoff)*FENC_STRIDE]; \
(m)->p_fenc[2] = &(src)[2][((xoff)>>1)+((yoff)>>1)*FENC_STRIDE];
#define LOAD_HPELS(m, src, list, ref, xoff, yoff) \
- (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
+ (m)->p_fref_w = (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->p_fref[4] = &(src)[4][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
(m)->p_fref[5] = &(src)[5][((xoff)>>1)+((yoff)>>1)*(m)->i_stride[1]]; \
- (m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]];
+ (m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]]; \
+ (m)->weight = weight_none; \
+ (m)->i_ref = ref;
+
+#define LOAD_WPELS(m, src, list, ref, xoff, yoff) \
+ (m)->p_fref_w = &(src)[(xoff)+(yoff)*(m)->i_stride[0]]; \
+ (m)->weight = h->sh.weight[i_ref];
#define REF_COST(list, ref) \
(a->p_cost_ref##list[ref])
/* 16x16 Search on all ref frame */
m.i_pixel = PIXEL_16x16;
- m.p_cost_mv = a->p_cost_mv;
LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
a->l0.me16x16.cost = INT_MAX;
const int i_ref_cost = REF_COST( 0, i_ref );
i_halfpel_thresh -= i_ref_cost;
m.i_ref_cost = i_ref_cost;
- m.i_ref = i_ref;
/* search with ref */
LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 0 );
+ LOAD_WPELS( &m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 0, 0 );
+
x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh );
const int y8 = i/2;
m.i_pixel = PIXEL_8x8;
- m.p_cost_mv = a->p_cost_mv;
LOAD_FENC( &m, p_fenc, 8*x8, 8*y8 );
l0m->cost = INT_MAX;
const int i_ref_cost = REF_COST( 0, i_ref );
i_halfpel_thresh -= i_ref_cost;
m.i_ref_cost = i_ref_cost;
- m.i_ref = i_ref;
LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*x8, 8*y8 );
+ LOAD_WPELS( &m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 8*x8, 8*y8 );
+
x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );
x264_me_search_ref( h, &m, a->l0.mvc[i_ref], i+1, p_halfpel_thresh );
{
const int i_ref = a->l0.me16x16.i_ref;
const int i_ref_cost = h->param.b_cabac || i_ref ? REF_COST( 0, i_ref ) : 0;
- uint8_t **p_fref = h->mb.pic.p_fref[0][i_ref];
uint8_t **p_fenc = h->mb.pic.p_fenc;
int i_mvc;
int16_t (*mvc)[2] = a->l0.mvc[i_ref];
const int y8 = i/2;
m->i_pixel = PIXEL_8x8;
- m->p_cost_mv = a->p_cost_mv;
m->i_ref_cost = i_ref_cost;
- m->i_ref = i_ref;
LOAD_FENC( m, p_fenc, 8*x8, 8*y8 );
- LOAD_HPELS( m, p_fref, 0, i_ref, 8*x8, 8*y8 );
+ LOAD_HPELS( m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*x8, 8*y8 );
+ LOAD_WPELS( m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 8*x8, 8*y8 );
+
x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
x264_me_search( h, m, mvc, i_mvc );
const int i_ref8s = ( ref8[0] == ref8[1] ) ? 1 : 2;
m.i_pixel = PIXEL_16x8;
- m.p_cost_mv = a->p_cost_mv;
LOAD_FENC( &m, p_fenc, 0, 8*i );
l0m->cost = INT_MAX;
const int i_ref = ref8[j];
const int i_ref_cost = REF_COST( 0, i_ref );
m.i_ref_cost = i_ref_cost;
- m.i_ref = i_ref;
/* if we skipped the 16x16 predictor, we wouldn't have to copy anything... */
*(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
*(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][2*i+2];
LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 8*i );
+ LOAD_WPELS( &m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 0, 8*i );
+
x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, i_ref );
x264_mb_predict_mv( h, 0, 8*i, 4, m.mvp );
x264_me_search( h, &m, mvc, 3 );
const int i_ref8s = ( ref8[0] == ref8[1] ) ? 1 : 2;
m.i_pixel = PIXEL_8x16;
- m.p_cost_mv = a->p_cost_mv;
LOAD_FENC( &m, p_fenc, 8*i, 0 );
l0m->cost = INT_MAX;
const int i_ref = ref8[j];
const int i_ref_cost = REF_COST( 0, i_ref );
m.i_ref_cost = i_ref_cost;
- m.i_ref = i_ref;
*(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
*(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][i+1];
*(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][i+3];
LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*i, 0 );
+ LOAD_WPELS( &m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 8*i, 0 );
+
x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, i_ref );
x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );
x264_me_search( h, &m, mvc, 3 );
const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
const int i_ref = a->l0.me8x8[i8x8].i_ref;
const int mvy_offset = h->mb.b_interlaced & i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ x264_weight_t *weight = h->sh.weight[i_ref];
#define CHROMA4x4MC( width, height, me, x, y ) \
h->mc.mc_chroma( &pix1[x+y*16], 16, &p_fref[4][or+x+y*i_stride], i_stride, (me).mv[0], (me).mv[1]+mvy_offset, width, height ); \
- h->mc.mc_chroma( &pix2[x+y*16], 16, &p_fref[5][or+x+y*i_stride], i_stride, (me).mv[0], (me).mv[1]+mvy_offset, width, height );
+ if( weight[1].weightfn ) \
+ weight[1].weightfn[width>>2]( &pix1[x+y*16], 16, &pix1[x+y*16], 16, &weight[1], height ); \
+ h->mc.mc_chroma( &pix2[x+y*16], 16, &p_fref[5][or+x+y*i_stride], i_stride, (me).mv[0], (me).mv[1]+mvy_offset, width, height ); \
+ if( weight[2].weightfn ) \
+ weight[1].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height );
+
if( pixel == PIXEL_4x4 )
{
x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
m->i_pixel = PIXEL_4x4;
- m->p_cost_mv = a->p_cost_mv;
LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
LOAD_HPELS( m, p_fref, 0, i_ref, 4*x4, 4*y4 );
+ LOAD_WPELS( m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 4*x4, 4*y4 );
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
m->i_pixel = PIXEL_8x4;
- m->p_cost_mv = a->p_cost_mv;
LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
LOAD_HPELS( m, p_fref, 0, i_ref, 4*x4, 4*y4 );
+ LOAD_WPELS( m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 4*x4, 4*y4 );
x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
m->i_pixel = PIXEL_4x8;
- m->p_cost_mv = a->p_cost_mv;
LOAD_FENC( m, p_fenc, 4*x4, 4*y4 );
LOAD_HPELS( m, p_fref, 0, i_ref, 4*x4, 4*y4 );
+ LOAD_WPELS( m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 4*x4, 4*y4 );
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
/* 16x16 Search on all ref frame */
m.i_pixel = PIXEL_16x16;
- m.p_cost_mv = a->p_cost_mv;
+ m.weight = weight_none;
+
LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
/* ME for List 0 */
/* save mv for predicting neighbors */
*(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
}
+ a->l0.me16x16.i_ref = a->l0.i_ref;
+
/* subtract ref cost, so we don't have to add it for the other MB types */
a->l0.me16x16.cost -= REF_COST( 0, a->l0.i_ref );
/* save mv for predicting neighbors */
*(uint32_t*)h->mb.mvr[1][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
}
+ a->l1.me16x16.i_ref = a->l1.i_ref;
+
/* subtract ref cost, so we don't have to add it for the other MB types */
a->l1.me16x16.cost -= REF_COST( 1, a->l1.i_ref );
/* get cost of BI mode */
src0 = h->mc.get_ref( pix0, &stride0,
- h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
- a->l0.me16x16.mv[0], a->l0.me16x16.mv[1], 16, 16 );
+ h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
+ a->l0.me16x16.mv[0], a->l0.me16x16.mv[1], 16, 16, weight_none );
src1 = h->mc.get_ref( pix1, &stride1,
- h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
- a->l1.me16x16.mv[0], a->l1.me16x16.mv[1], 16, 16 );
+ h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
+ a->l1.me16x16.mv[0], a->l1.me16x16.mv[1], 16, 16, weight_none );
h->mc.avg[PIXEL_16x16]( pix0, 16, src0, stride0, src1, stride1, h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );
x264_me_t *m = &lX->me8x8[i];
m->i_pixel = PIXEL_8x8;
- m->p_cost_mv = a->p_cost_mv;
LOAD_FENC( m, h->mb.pic.p_fenc, 8*x8, 8*y8 );
LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*x8, 8*y8 );
/* BI mode */
src[l] = h->mc.get_ref( pix[l], &stride[l], m->p_fref, m->i_stride[0],
- m->mv[0], m->mv[1], 8, 8 );
+ m->mv[0], m->mv[1], 8, 8, weight_none );
i_part_cost_bi += m->cost_mv;
/* FIXME: ref cost */
}
x264_me_t *m = &lX->me16x8[i];
m->i_pixel = PIXEL_16x8;
- m->p_cost_mv = a->p_cost_mv;
LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i );
/* BI mode */
src[l] = h->mc.get_ref( pix[l], &stride[l], m->p_fref, m->i_stride[0],
- m->mv[0], m->mv[1], 16, 8 );
+ m->mv[0], m->mv[1], 16, 8, weight_none );
/* FIXME: ref cost */
i_part_cost_bi += m->cost_mv;
}
x264_me_t *m = &lX->me8x16[i];
m->i_pixel = PIXEL_8x16;
- m->p_cost_mv = a->p_cost_mv;
LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 );
/* BI mode */
src[l] = h->mc.get_ref( pix[l], &stride[l], m->p_fref, m->i_stride[0],
- m->mv[0], m->mv[1], 8, 16 );
+ m->mv[0], m->mv[1], 8, 16, weight_none );
/* FIXME: ref cost */
i_part_cost_bi += m->cost_mv;
}
void x264_slicetype_analyse( x264_t *h, int keyframe );
+int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
+void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lowres, int b_lookahead );
+
int x264_lookahead_init( x264_t *h, int i_slicetype_length );
int x264_lookahead_is_empty( x264_t *h );
void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame );
}
}
- if( ( sh->pps->b_weighted_pred && ( sh->i_type == SLICE_TYPE_P || sh->i_type == SLICE_TYPE_SP ) ) ||
- ( sh->pps->b_weighted_bipred == 1 && sh->i_type == SLICE_TYPE_B ) )
+ if( sh->pps->b_weighted_pred && ( sh->i_type == SLICE_TYPE_P || sh->i_type == SLICE_TYPE_SP ) )
{
- /* FIXME */
+ /* pred_weight_table() */
+ bs_write_ue( s, sh->weight[0][0].i_denom );
+ bs_write_ue( s, sh->weight[0][1].i_denom );
+ for( i = 0; i < sh->i_num_ref_idx_l0_active; i++ )
+ {
+ int luma_weight_l0_flag = !!sh->weight[i][0].weightfn;
+ int chroma_weight_l0_flag = !!sh->weight[i][1].weightfn || !!sh->weight[i][2].weightfn;
+ bs_write1( s, luma_weight_l0_flag );
+ if( luma_weight_l0_flag )
+ {
+ bs_write_se( s, sh->weight[i][0].i_scale );
+ bs_write_se( s, sh->weight[i][0].i_offset );
+ }
+ bs_write1( s, chroma_weight_l0_flag );
+ if( chroma_weight_l0_flag )
+ {
+ int j;
+ for( j = 1; j < 3; j++ )
+ {
+ bs_write_se( s, sh->weight[i][j].i_scale );
+ bs_write_se( s, sh->weight[i][j].i_offset );
+ }
+ }
+ }
+ }
+ else if( sh->pps->b_weighted_bipred == 1 && sh->i_type == SLICE_TYPE_B )
+ {
+ /* TODO */
}
if( i_nal_ref_idc != 0 )
x264_log( h, X264_LOG_WARNING, "interlace + direct=temporal is not implemented\n" );
h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
}
+ if( h->param.analyse.i_weighted_pred > 0 )
+ {
+ x264_log( h, X264_LOG_WARNING, "interlace + weightp is not implemented\n" );
+ h->param.analyse.i_weighted_pred = X264_WEIGHTP_NONE;
+ }
}
/* Detect default ffmpeg settings and terminate with an error. */
h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 512 >> h->param.b_interlaced);
}
+ h->param.analyse.i_weighted_pred = x264_clip3( h->param.analyse.i_weighted_pred, 0, X264_WEIGHTP_SMART );
+ if( !h->param.analyse.i_weighted_pred && h->param.rc.b_mb_tree && h->param.analyse.b_psy && !h->param.b_interlaced )
+ h->param.analyse.i_weighted_pred = X264_WEIGHTP_FAKE;
+
if( h->param.i_threads > 1 )
{
int r = h->param.analyse.i_mv_range_thread;
CHECKED_MALLOCZERO( h->frames.unused[1], (h->param.i_threads + 20) * sizeof(x264_frame_t *) );
CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
+ h->param.i_threads + 3) * sizeof(x264_frame_t *) );
-
+ if( h->param.analyse.i_weighted_pred > 0 )
+ CHECKED_MALLOCZERO( h->frames.blank_unused, h->param.i_threads * 4 * sizeof(x264_frame_t *) );
h->i_ref0 = 0;
h->i_ref1 = 0;
}
}
+/* return -1 on failure, else return the index of the new reference frame */
+int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w )
+{
+ int i = h->i_ref0;
+ int j;
+ x264_frame_t *newframe;
+ if( i <= 1 ) /* empty list, definitely can't duplicate frame */
+ return -1;
+
+ /* Find a place to insert the duplicate in the reference list. */
+ for( j = 0; j < i; j++ )
+ if( h->fref0[i_ref]->i_frame != h->fref0[j]->i_frame )
+ {
+ /* found a place, after j, make sure there is not already a duplicate there */
+ if( j == i-1 || ( h->fref0[j+1] && h->fref0[i_ref]->i_frame != h->fref0[j+1]->i_frame ) )
+ break;
+ }
+
+ if( j == i ) /* No room in the reference list for the duplicate. */
+ return -1;
+ j++;
+
+ newframe = x264_frame_pop_blank_unused( h );
+
+ //FIXME: probably don't need to copy everything
+ *newframe = *h->fref0[i_ref];
+ newframe->i_reference_count = 1;
+ newframe->orig = h->fref0[i_ref];
+ newframe->b_duplicate = 1;
+ memcpy( h->fenc->weight[j], w, sizeof(h->fenc->weight[i]) );
+
+ /* shift the frames to make space for the dupe. */
+ h->b_ref_reorder[0] = 1;
+ if( h->i_ref0 < 16 )
+ ++h->i_ref0;
+ h->fref0[15] = NULL;
+ x264_frame_unshift( &h->fref0[j], newframe );
+
+ return j;
+}
+
+static void x264_weighted_pred_init( x264_t *h )
+{
+ int i_ref;
+ int i;
+
+ /* for now no analysis and set all weights to nothing */
+ for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
+ h->fenc->weighted[i_ref] = h->fref0[i_ref]->filtered[0];
+
+ // FIXME: This only supports weighting of one reference frame
+ // and duplicates of that frame.
+ h->fenc->i_lines_weighted = 0;
+
+ for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
+ for( i = 0; i < 3; i++ )
+ h->sh.weight[i_ref][i].weightfn = NULL;
+
+
+ if( h->sh.i_type != SLICE_TYPE_P || h->param.analyse.i_weighted_pred <= 0 )
+ return;
+
+ int i_padv = PADV << h->param.b_interlaced;
+ int denom = -1;
+ int weightluma = 0;
+ int buffer_next = 0;
+ int j;
+ //FIXME: when chroma support is added, move this into loop
+ h->sh.weight[0][1].weightfn = h->sh.weight[0][2].weightfn = NULL;
+ h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
+ for( j = 0; j < h->i_ref0; j++ )
+ {
+ if( h->fenc->weight[j][0].weightfn )
+ {
+ h->sh.weight[j][0] = h->fenc->weight[j][0];
+ // if weight is useless, don't write it to stream
+ if( h->sh.weight[j][0].i_scale == 1<<h->sh.weight[j][0].i_denom && h->sh.weight[j][0].i_offset == 0 )
+ h->sh.weight[j][0].weightfn = NULL;
+ else
+ {
+ if( !weightluma )
+ {
+ weightluma = 1;
+ h->sh.weight[0][0].i_denom = denom = h->sh.weight[j][0].i_denom;
+ }
+ assert( h->sh.weight[j][0].i_denom == denom );
+ h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] +
+ h->fenc->i_stride[0] * i_padv + PADH;
+ }
+ }
+
+ //scale full resolution frame
+ if( h->sh.weight[j][0].weightfn && h->param.i_threads == 1 )
+ {
+ uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
+ uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
+ int stride = h->fenc->i_stride[0];
+ int width = h->fenc->i_width[0] + PADH*2;
+ int height = h->fenc->i_lines[0] + i_padv*2;
+ x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
+ h->fenc->i_lines_weighted = height;
+ }
+ }
+ if( !weightluma )
+ h->sh.weight[0][0].i_denom = 0;
+}
+
static inline void x264_reference_build_list( x264_t *h, int i_poc )
{
int i;
h->i_ref1 = X264_MIN( h->i_ref1, h->frames.i_max_ref1 );
h->i_ref0 = X264_MIN( h->i_ref0, h->frames.i_max_ref0 );
h->i_ref0 = X264_MIN( h->i_ref0, h->param.i_frame_reference ); // if reconfig() has lowered the limit
+
+ /* add duplicates */
+ if( h->fenc->i_type == X264_TYPE_P )
+ {
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ {
+ x264_weight_t w[3];
+ w[1].weightfn = w[2].weightfn = NULL;
+ if( h->param.rc.b_stat_read )
+ x264_ratecontrol_set_weights( h, h->fenc );
+ else if( h->param.i_threads == 1 )
+ x264_weights_analyse( h, h->fenc, h->fref0[0], 0, 0 );
+
+ if( !h->fenc->weight[0][0].weightfn )
+ {
+ h->fenc->weight[0][0].i_denom = 0;
+ SET_WEIGHT( w[0], 1, 1, 0, -1 );
+ x264_weighted_reference_duplicate( h, 0, w );
+ }
+ else
+ {
+ if( h->fenc->weight[0][0].i_scale == 1<<h->fenc->weight[0][0].i_denom )
+ {
+ SET_WEIGHT( h->fenc->weight[0][0], 1, 1, 0, h->fenc->weight[0][0].i_offset );
+ }
+ x264_weighted_reference_duplicate( h, 0, weight_none );
+ w[0] = h->fenc->weight[0][0];
+ w[0].i_offset--;
+ h->mc.weight_cache( h, &w[0] );
+ x264_weighted_reference_duplicate( h, 0, w );
+ }
+ }
+ else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
+ {
+ //weighted offset=-1
+ x264_weight_t w[3];
+ SET_WEIGHT( w[0], 1, 1, 0, -1 );
+ h->fenc->weight[0][0].i_denom = 0;
+ w[1].weightfn = w[2].weightfn = NULL;
+ x264_weighted_reference_duplicate( h, 0, w );
+ }
+ }
+
assert( h->i_ref0 + h->i_ref1 <= 16 );
h->mb.pic.i_fref[0] = h->i_ref0;
h->mb.pic.i_fref[1] = h->i_ref1;
if( h->sh.i_type == SLICE_TYPE_B )
x264_macroblock_bipred_init( h );
+ /*------------------------- Weights -------------------------------------*/
+ x264_weighted_pred_init( h );
+
/* ------------------------ Create slice header ----------------------- */
x264_slice_init( h, i_nal_type, i_global_qp );
for( i = 0; i < 32; i++ )
h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i];
if( h->sh.i_type == SLICE_TYPE_P )
+ {
h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ {
+ for( i = 0; i < 3; i++ )
+ for( j = 0; j < h->i_ref0; j++ )
+ if( h->sh.weight[0][i].i_denom != 0 )
+ {
+ h->stat.i_wpred[i]++;
+ break;
+ }
+ }
+ }
if( h->sh.i_type == SLICE_TYPE_B )
{
h->stat.i_direct_frames[ h->sh.b_direct_spatial_mv_pred ] ++;
}
#endif
+ /* Remove duplicates, must be done near the end as breaks h->fref0 array
+ * by freeing some of its pointers. */
+ for( i = 0; i < h->i_ref0; i++ )
+ if( h->fref0[i] && h->fref0[i]->b_duplicate )
+ {
+ x264_frame_push_blank_unused( h, h->fref0[i] );
+ h->fref0[i] = 0;
+ }
+
if( h->param.psz_dump_yuv )
x264_frame_dump( h );
fixed_pred_modes[i][8] * 100.0 / sum_pred_modes[i] );
}
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%\n",
+ h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
+
for( i_list = 0; i_list < 2; i_list++ )
{
int i_slice;
x264_frame_delete_list( h->frames.unused[0] );
x264_frame_delete_list( h->frames.unused[1] );
x264_frame_delete_list( h->frames.current );
+ x264_frame_delete_list( h->frames.blank_unused );
h = h->thread[0];
{
h->mc.mc_luma( h->mb.pic.p_fdec[0], FDEC_STRIDE,
h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
- mvx, mvy, 16, 16 );
+ mvx, mvy, 16, 16, &h->sh.weight[0][0] );
h->mc.mc_chroma( h->mb.pic.p_fdec[1], FDEC_STRIDE,
h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
mvx, mvy, 8, 8 );
+ if( h->sh.weight[0][1].weightfn )
+ h->sh.weight[0][1].weightfn[8>>2]( h->mb.pic.p_fdec[1], FDEC_STRIDE,
+ h->mb.pic.p_fdec[1], FDEC_STRIDE,
+ &h->sh.weight[0][1], 8 );
+
h->mc.mc_chroma( h->mb.pic.p_fdec[2], FDEC_STRIDE,
h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
mvx, mvy, 8, 8 );
+
+ if( h->sh.weight[0][2].weightfn )
+ h->sh.weight[0][2].weightfn[8>>2]( h->mb.pic.p_fdec[2], FDEC_STRIDE,
+ h->mb.pic.p_fdec[2], FDEC_STRIDE,
+ &h->sh.weight[0][2], 8 );
}
x264_macroblock_encode_skip( h );
/* Motion compensation */
h->mc.mc_luma( h->mb.pic.p_fdec[0], FDEC_STRIDE,
h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
- mvp[0], mvp[1], 16, 16 );
+ mvp[0], mvp[1], 16, 16, &h->sh.weight[0][0] );
}
for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
h->mc.mc_chroma( h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
mvp[0], mvp[1], 8, 8 );
+
+ if( h->sh.weight[0][1+ch].weightfn )
+ h->sh.weight[0][1+ch].weightfn[8>>2]( h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
+ h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
+ &h->sh.weight[0][1+ch], 8 );
}
/* there is almost never a termination during chroma, but we can't avoid the check entirely */
const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[i4]][1], h->mb.mv_min[1], h->mb.mv_max[1] );
int nz;
- h->mc.mc_luma( p_fdec, FDEC_STRIDE, h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0], mvx + 4*4*block_idx_x[i4], mvy + 4*4*block_idx_y[i4], 4, 4 );
+ h->mc.mc_luma( p_fdec, FDEC_STRIDE, h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
+ mvx + 4*4*block_idx_x[i4], mvy + 4*4*block_idx_y[i4], 4, 4, &h->sh.weight[i_ref][0] );
if( h->mb.b_lossless )
{
#define COST_MV( mx, my )\
{\
int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE,\
- &p_fref[(my)*stride+(mx)], stride )\
+ &p_fref_w[(my)*stride+(mx)], stride )\
+ BITS_MVD(mx,my);\
COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my );\
}
#define COST_MV_HPEL( mx, my ) \
{ \
int stride2 = 16; \
- uint8_t *src = h->mc.get_ref( pix, &stride2, m->p_fref, stride, mx, my, bw, bh ); \
+ uint8_t *src = h->mc.get_ref( pix, &stride2, m->p_fref, stride, mx, my, bw, bh, &m->weight[0] ); \
int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, src, stride2 ) \
+ p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
COPY3_IF_LT( bpred_cost, cost, bpred_mx, mx, bpred_my, my ); \
#define COST_MV_X3_DIR( m0x, m0y, m1x, m1y, m2x, m2y, costs )\
{\
- uint8_t *pix_base = p_fref + bmx + bmy*stride;\
+ uint8_t *pix_base = p_fref_w + bmx + bmy*stride;\
h->pixf.fpelcmp_x3[i_pixel]( p_fenc,\
pix_base + (m0x) + (m0y)*stride,\
pix_base + (m1x) + (m1y)*stride,\
#define COST_MV_X4_DIR( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs )\
{\
- uint8_t *pix_base = p_fref + bmx + bmy*stride;\
+ uint8_t *pix_base = p_fref_w + bmx + bmy*stride;\
h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
pix_base + (m0x) + (m0y)*stride,\
pix_base + (m1x) + (m1y)*stride,\
#define COST_MV_X4( m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y )\
{\
- uint8_t *pix_base = p_fref + omx + omy*stride;\
+ uint8_t *pix_base = p_fref_w + omx + omy*stride;\
h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
pix_base + (m0x) + (m0y)*stride,\
pix_base + (m1x) + (m1y)*stride,\
#define COST_MV_X3_ABS( m0x, m0y, m1x, m1y, m2x, m2y )\
{\
h->pixf.fpelcmp_x3[i_pixel]( p_fenc,\
- p_fref + (m0x) + (m0y)*stride,\
- p_fref + (m1x) + (m1y)*stride,\
- p_fref + (m2x) + (m2y)*stride,\
+ p_fref_w + (m0x) + (m0y)*stride,\
+ p_fref_w + (m1x) + (m1y)*stride,\
+ p_fref_w + (m2x) + (m2y)*stride,\
stride, costs );\
costs[0] += p_cost_mvx[(m0x)<<2]; /* no cost_mvy */\
costs[1] += p_cost_mvx[(m1x)<<2];\
int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
int omx, omy, pmx, pmy;
uint8_t *p_fenc = m->p_fenc[0];
- uint8_t *p_fref = m->p_fref[0];
+ uint8_t *p_fref_w = m->p_fref_w;
ALIGNED_ARRAY_16( uint8_t, pix,[16*16] );
int i, j;
else
{
int dir = 0;
- uint8_t *pix_base = p_fref + omx + (omy-4*i)*stride;
+ uint8_t *pix_base = p_fref_w + omx + (omy-4*i)*stride;
int dy = i*stride;
#define SADS(k,x0,y0,x1,y1,x2,y2,x3,y3)\
h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15));
int nmvsad = 0, limit;
int sad_thresh = i_me_range <= 16 ? 10 : i_me_range <= 24 ? 11 : 12;
- int bsad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref+bmy*stride+bmx, stride )
+ int bsad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref_w+bmy*stride+bmx, stride )
+ BITS_MVD( bmx, bmy );
for( my = min_y; my <= max_y; my++ )
{
cost_fpel_mvx+min_x, xs, width, bsad*17/16 );
for( i=0; i<xn-2; i+=3 )
{
- uint8_t *ref = p_fref+min_x+my*stride;
+ uint8_t *ref = p_fref_w+min_x+my*stride;
int sads[3];
h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
for( j=0; j<3; j++ )
for( ; i<xn; i++ )
{
int mx = min_x+xs[i];
- int sad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref+mx+my*stride, stride )
+ int sad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref_w+mx+my*stride, stride )
+ cost_fpel_mvx[xs[i]];
if( sad < bsad*sad_thresh>>3 )
{
if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P )
m->cost -= m->i_ref_cost;
-
+
refine_subpel( h, m, hpel, qpel, NULL, 1 );
}
#define COST_MV_SAD( mx, my ) \
{ \
int stride = 16; \
- uint8_t *src = h->mc.get_ref( pix[0], &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh ); \
+ uint8_t *src = h->mc.get_ref( pix[0], &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
int cost = h->pixf.fpelcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \
if( b_refine_qpel || (dir^1) != odir ) \
{ \
int stride = 16; \
- uint8_t *src = h->mc.get_ref( pix[0], &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh ); \
+ uint8_t *src = h->mc.get_ref( pix[0], &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
int cost = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
if( b_chroma_me && cost < bcost ) \
{ \
h->mc.mc_chroma( pix[0], 8, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw/2, bh/2 ); \
+ if( m->weight[1].weightfn ) \
+ m->weight[1].weightfn[x264_pixel_size[i_pixel].w>>3]( pix[0], 8, pix[0], 8, \
+ &m->weight[1], x264_pixel_size[i_pixel].h>>1 ); \
cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix[0], 8 ); \
if( cost < bcost ) \
{ \
h->mc.mc_chroma( pix[0], 8, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw/2, bh/2 ); \
cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix[0], 8 ); \
+ if( m->weight[2].weightfn ) \
+ m->weight[2].weightfn[x264_pixel_size[i_pixel].w>>3]( pix[0], 8, pix[0], 8, \
+ &m->weight[2], x264_pixel_size[i_pixel].h>>1 ); \
} \
} \
if( cost < bcost ) \
int costs[4];
int stride = 32; // candidates are either all hpel or all qpel, so one stride is enough
uint8_t *src0, *src1, *src2, *src3;
- src0 = h->mc.get_ref( pix[0], &stride, m->p_fref, m->i_stride[0], omx, omy-2, bw, bh+1 );
- src2 = h->mc.get_ref( pix[1], &stride, m->p_fref, m->i_stride[0], omx-2, omy, bw+4, bh );
+ src0 = h->mc.get_ref( pix[0], &stride, m->p_fref, m->i_stride[0], omx, omy-2, bw, bh+1, &m->weight[0] );
+ src2 = h->mc.get_ref( pix[1], &stride, m->p_fref, m->i_stride[0], omx-2, omy, bw+4, bh, &m->weight[0] );
src1 = src0 + stride;
src3 = src2 + 1;
h->pixf.fpelcmp_x4[i_pixel]( m->p_fenc[0], src0, src1, src2, src3, stride, costs );
int mvx = om##list##x+dx;\
int mvy = om##list##y+dy;\
stride##list[i] = bw;\
- src##list[i] = h->mc.get_ref( pixy_buf[list][i], &stride##list[i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh ); \
+ src##list[i] = h->mc.get_ref( pixy_buf[list][i], &stride##list[i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none ); \
if( rd )\
{\
h->mc.mc_chroma( pixu_buf[list][i], 8, m->p_fref[4], m->i_stride[1], mvx, mvy + mv##list##y_offset, bw>>1, bh>>1 );\
if( !avoid_mvp || !(mx == pmx && my == pmy) ) \
{ \
int stride = 16; \
- uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4 ); \
+ uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4, &m->weight[0] ); \
dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[mx] + p_cost_mvy[my]; \
COPY1_IF_LT( bsatd, dst ); \
typedef struct
{
+ /* aligning the first member is a gcc hack to force the struct to be
+ * 16 byte aligned, as well as force sizeof(struct) to be a multiple of 16 */
/* input */
- int i_pixel; /* PIXEL_WxH */
+ ALIGNED_16( int i_pixel ); /* PIXEL_WxH */
uint16_t *p_cost_mv; /* lambda * nbits for each possible mv */
int i_ref_cost;
int i_ref;
+ const x264_weight_t *weight;
uint8_t *p_fref[6];
+ uint8_t *p_fref_w;
uint8_t *p_fenc[3];
uint16_t *integral;
int i_stride[2];
int s_count;
float blurred_complexity;
char direct_mode;
+ int8_t weight[2];
+ int8_t i_weight_denom;
int refcount[16];
int refs;
} ratecontrol_entry_t;
{
ratecontrol_entry_t *rce = h->rc->rce;
x264_frame_t *frames[16];
+ x264_weight_t weights[16][3];
+ int refcount[16];
int ref, i;
if( rce->refs != h->i_ref0 )
return -1;
memcpy( frames, h->fref0, sizeof(frames) );
+ memcpy( refcount, rce->refcount, sizeof(refcount) );
+ memcpy( weights, h->fenc->weight, sizeof(weights) );
+ memset( h->fenc->weight, 0, sizeof(h->fenc->weight) );
/* For now don't reorder ref 0; it seems to lower quality
in most cases due to skips. */
{
int max = -1;
int bestref = 1;
+
for( i = 1; i < h->i_ref0; i++ )
- /* Favor lower POC as a tiebreaker. */
- COPY2_IF_GT( max, rce->refcount[i], bestref, i );
- rce->refcount[bestref] = -1;
+ if( !frames[i]->b_duplicate || frames[i]->i_frame != h->fref0[ref-1]->i_frame )
+ /* Favor lower POC as a tiebreaker. */
+ COPY2_IF_GT( max, refcount[i], bestref, i );
+
+ /* FIXME: If there are duplicates from frames other than ref0 then it is possible
+ * that the optimal ordering doesnt place every duplicate. */
+
+ refcount[bestref] = -1;
h->fref0[ref] = frames[bestref];
+ memcpy( h->fenc->weight[ref], weights[bestref], sizeof(weights[bestref]) );
}
return 0;
return -1;
}
+ if( ( p = strstr( opts, "wpredp=" ) ) && sscanf( p, "wpredp=%d", &i ) &&
+ X264_MAX( 0, h->param.analyse.i_weighted_pred ) != i )
+ {
+ x264_log( h, X264_LOG_ERROR, "different weightp option than 1st pass (had weightp=%d)\n", i );
+ return -1;
+ }
+
/* since B-adapt doesn't (yet) take into account B-pyramid,
* the converse is not a problem */
if( h->param.i_bframe )
}
rce->refs = ref;
+ /* find weights */
+ rce->i_weight_denom = -1;
+ char *w = strchr( p, 'w' );
+ if( w )
+ if( sscanf( w, "w:%hhd,%hhd,%hhd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
+ rce->i_weight_denom = -1;
+
switch(pict_type)
{
case 'I': rce->kept_as_ref = 1;
}
}
+void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
+{
+ ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame];
+ if( h->param.analyse.i_weighted_pred <= 0 )
+ return;
+ if( rce->i_weight_denom >= 0 )
+ SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0], rce->i_weight_denom, rce->weight[1] );
+}
+
/* After encoding one frame, save stats and update ratecontrol state */
int x264_ratecontrol_end( x264_t *h, int bits )
{
c_direct) < 0 )
goto fail;
- for( i = 0; i < h->i_ref0; i++ )
+ /* Only write information for reference reordering once. */
+ int use_old_stats = h->param.rc.b_stat_read && rc->rce->refs > 1;
+ for( i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref0); i++ )
{
- int refcount = h->param.b_interlaced ? h->stat.frame.i_mb_count_ref[0][i*2]
- + h->stat.frame.i_mb_count_ref[0][i*2+1] :
- h->stat.frame.i_mb_count_ref[0][i];
+ int refcount = use_old_stats ? rc->rce->refcount[i]
+ : h->param.b_interlaced ? h->stat.frame.i_mb_count_ref[0][i*2]
+ + h->stat.frame.i_mb_count_ref[0][i*2+1]
+ : h->stat.frame.i_mb_count_ref[0][i];
if( fprintf( rc->p_stat_file_out, "%d ", refcount ) < 0 )
goto fail;
}
- if( fprintf( rc->p_stat_file_out, ";\n" ) < 0 )
+ if( h->sh.weight[0][0].weightfn )
+ {
+ if( fprintf( rc->p_stat_file_out, "w:%d,%d,%d", h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
+ goto fail;
+ }
+
+ if( fprintf( rc->p_stat_file_out, ";\n") < 0 )
goto fail;
/* Don't re-write the data in multi-pass mode. */
void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead );
int x264_ratecontrol_slice_type( x264_t *, int i_frame );
+void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm );
void x264_ratecontrol_mb( x264_t *, int bits );
int x264_ratecontrol_qp( x264_t * );
int x264_ratecontrol_end( x264_t *, int bits );
void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
int x264_ratecontrol_get_estimated_size( x264_t const *);
int x264_rc_analyse_slice( x264_t *h );
+int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
#endif
sps->i_profile_idc = PROFILE_HIGH444_PREDICTIVE;
else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT )
sps->i_profile_idc = PROFILE_HIGH;
- else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced )
+ else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->analyse.i_weighted_pred > 0 )
sps->i_profile_idc = PROFILE_MAIN;
else
sps->i_profile_idc = PROFILE_BASELINE;
pps->i_num_ref_idx_l0_active = 1;
pps->i_num_ref_idx_l1_active = 1;
- pps->b_weighted_pred = 0;
+ pps->b_weighted_pred = param->analyse.i_weighted_pred > 0;
pps->b_weighted_bipred = param->analyse.b_weighted_bipred ? 2 : 0;
pps->i_pic_init_qp = param->rc.i_rc_method == X264_RC_ABR ? 26 : param->rc.i_qp_constant;
*****************************************************************************
* Copyright (C) 2005-2008 x264 project
*
- * Authors: Loren Merritt <lorenm@u.washington.edu>
- * Fiona Glaser <fiona@x264.com>
+ * Authors: Fiona Glaser <fiona@x264.com>
+ * Loren Merritt <lorenm@u.washington.edu>
+ * Dylan Yudaken <dyudaken@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#include "macroblock.h"
#include "me.h"
+static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
+ x264_frame_t **frames, int p0, int p1, int b,
+ int b_intra_penalty );
static void x264_lowres_context_init( x264_t *h, x264_mb_analysis_t *a )
{
h->mb.b_chroma_me = 0;
}
+/* makes a non-h264 weight (ie. a multipler with a denominator of 128 ), into an h264 weight */
+static void get_h264_weight( unsigned int weight_nonh264, int offset, x264_weight_t *w )
+{
+ w->i_offset = offset;
+ w->i_denom = 7;
+ w->i_scale = weight_nonh264;
+ while( w->i_denom > 0 && (w->i_scale > 127 || !(w->i_scale & 1)) )
+ {
+ w->i_denom--;
+ w->i_scale >>= 1;
+ }
+ w->i_scale = X264_MIN( w->i_scale, 127 );
+}
+/* due to a GCC bug on some platforms (win32), flat[16] may not actually be aligned. */
+ALIGNED_16( static uint8_t flat[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+
+static void weights_plane_analyse( x264_t *h, uint8_t *plane, int width, int height, int stride, unsigned int *sum, uint64_t *var )
+{
+ int x,y;
+ unsigned int sad = 0;
+ uint64_t ssd = 0;
+ uint8_t *p = plane;
+ for( y = 0; y < height>>4; y++, p += stride*16 )
+ for( x = 0; x < width; x+=16 )
+ {
+ sad += h->pixf.sad_aligned[PIXEL_16x16]( p + x, stride, flat, 0 );
+ ssd += h->pixf.ssd[PIXEL_16x16]( p + x, stride, flat, 0 );
+ }
+
+ *sum = sad;
+ *var = ssd - (uint64_t) sad * sad / ( width * height );
+ x264_emms();
+}
+
+#define LOAD_HPELS_LUMA(dst, src) \
+{ \
+ (dst)[0] = &(src)[0][i_pel_offset]; \
+ (dst)[1] = &(src)[1][i_pel_offset]; \
+ (dst)[2] = &(src)[2][i_pel_offset]; \
+ (dst)[3] = &(src)[3][i_pel_offset]; \
+}
+
+static uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest, int b_lowres )
+{
+ uint8_t **ref_planes = b_lowres ? ref->lowres : ref->filtered;
+ int ref0_distance = fenc->i_frame - ref->i_frame - 1;
+ /* Note: this will never run during lookahead as weights_analyse is only called if no
+ * motion search has been done. */
+ if( h->frames.b_have_lowres && fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF
+ && ( h->param.analyse.i_subpel_refine || h->param.i_threads > 1 ))
+ {
+ uint8_t *src[4];
+ int i_stride = b_lowres ? fenc->i_stride_lowres : fenc->i_stride[0];
+ int i_lines = b_lowres ? fenc->i_lines_lowres : fenc->i_lines[0];
+ int i_width = b_lowres ? fenc->i_width_lowres : fenc->i_width[0];
+ int i_mb_xy = 0;
+ int mbsizeshift = b_lowres ? 3 : 4;
+ int mbsize = 1 << mbsizeshift;
+ int x,y;
+ int i_pel_offset = 0;
+
+ for( y = 0; y < i_lines; y += mbsize, i_pel_offset = y*i_stride )
+ for( x = 0; x < i_width; x += mbsize, i_mb_xy++, i_pel_offset += mbsize )
+ {
+ uint8_t *pix = &dest[ i_pel_offset ];
+ int mvx = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][0] << !b_lowres;
+ int mvy = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][1] << !b_lowres;
+ LOAD_HPELS_LUMA( src, ref_planes );
+ h->mc.mc_luma( pix, i_stride, src, i_stride,
+ mvx, mvy, mbsize, mbsize, weight_none );
+ }
+ return dest;
+ }
+ return ref_planes[0];
+}
+#undef LOAD_HPELS_LUMA
+
+static unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w, int b_lowres )
+{
+ int x, y;
+ unsigned int cost = 0;
+ int mbsize = b_lowres ? 8 : 16;
+ int pixelsize = mbsize == 8 ? PIXEL_8x8 : PIXEL_16x16;
+ int i_stride = b_lowres ? fenc->i_stride_lowres : fenc->i_stride[0];
+ int i_lines = b_lowres ? fenc->i_lines_lowres : fenc->i_lines[0];
+ int i_width = b_lowres ? fenc->i_width_lowres : fenc->i_width[0];
+ uint8_t *fenc_plane = b_lowres ? fenc->lowres[0] : fenc->plane[0];
+ ALIGNED_16( uint8_t buf[16*16] );
+ int pixoff = 0;
+ int i_mb = 0;
+
+ if( w )
+ for( y = 0; y < i_lines; y += mbsize, pixoff = ( y*i_stride ) )
+ for( x = 0; x < i_width; x += mbsize, i_mb++, pixoff += mbsize)
+ {
+ w->weightfn[mbsize>>2]( buf, 16, &src[pixoff], i_stride, w, mbsize );
+ cost += X264_MIN( h->pixf.mbcmp[pixelsize]( buf, 16, &fenc_plane[pixoff], i_stride ), fenc->i_intra_cost[i_mb] );
+ }
+ else
+ for( y = 0; y < i_lines; y += mbsize, pixoff = ( y*i_stride ) )
+ for( x = 0; x < i_width; x+=mbsize, i_mb++, pixoff += mbsize )
+ cost += X264_MIN( h->pixf.mbcmp[pixelsize]( &src[pixoff], i_stride, &fenc_plane[pixoff], i_stride ), fenc->i_intra_cost[i_mb] );
+
+ int lambda = b_lowres ? 1 : 4;
+ if( w )
+ {
+ int numslices;
+ if( h->param.i_slice_count )
+ numslices = h->param.i_slice_count;
+ else if ( h->param.i_slice_max_mbs )
+ numslices = ( h->sps->i_mb_width * h->sps->i_mb_height + h->param.i_slice_max_mbs-1 ) / h->param.i_slice_max_mbs;
+ else
+ numslices = 1;
+ // FIXME still need to calculate for --slice-max-size
+ // Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used.
+ cost += lambda * numslices * ( 10 + 2 * ( bs_size_ue( w[0].i_denom ) + bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset ) ) );
+ }
+ return cost;
+}
+
+void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lowres, int b_lookahead )
+{
+ unsigned int fenc_sum, ref_sum;
+ float fenc_mean, ref_mean;
+ uint64_t fenc_var, ref_var;
+ int i_off, offset_search;
+ int minoff, minscale, mindenom;
+ unsigned int minscore, origscore;
+ int i_delta_index = fenc->i_frame - ref->i_frame - 1;
+ /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */
+ const float epsilon = 1.0/128.0;
+
+ float guess_scale;
+ int found;
+ x264_weight_t *weights = fenc->weight[0];
+
+ weights_plane_analyse( h, fenc->plane[0], fenc->i_width[0], fenc->i_lines[0], fenc->i_stride[0], &fenc_sum, &fenc_var );
+ weights_plane_analyse( h, ref->plane[0], ref->i_width[0], ref->i_lines[0], ref->i_stride[0], &ref_sum, &ref_var );
+ fenc_var = round( sqrt( fenc_var ) );
+ ref_var = round( sqrt( ref_var ) );
+ fenc_mean = (float)fenc_sum / ( fenc->i_lines[0] * fenc->i_width[0] );
+ ref_mean = (float)ref_sum / ( fenc->i_lines[0] * fenc->i_width[0] );
+
+ //early termination
+
+ if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - ( (float)fenc_var / ref_var ) ) < epsilon )
+ return;
+
+ guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
+ get_h264_weight( round( guess_scale * 128 ), 0, &weights[0] );
+
+ found = 0;
+ mindenom = weights[0].i_denom;
+ minscale = weights[0].i_scale;
+ minoff = 0;
+ offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
+
+ if( !fenc->b_intra_calculated )
+ {
+ x264_mb_analysis_t a;
+ x264_lowres_context_init( h, &a );
+ x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
+ }
+ uint8_t *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0], b_lowres );
+ origscore = minscore = x264_weight_cost( h, fenc, mcbuf, 0, b_lowres );
+
+ if( !minscore )
+ return;
+
+ // This gives a slight improvement due to rounding errors but only tests
+ // one offset on lookahead.
+ // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
+ for( i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
+ {
+ SET_WEIGHT( weights[0], 1, minscale, mindenom, i_off );
+ unsigned int s = x264_weight_cost( h, fenc, mcbuf, &weights[0], b_lowres );
+ COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
+ }
+ x264_emms();
+
+ /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
+ if( !found || ( minscale == 1<<mindenom && minoff == 0 ) || minscore >= fenc->i_width[0] * fenc->i_lines[0] * ( b_lowres ? 2 : 8 ) )
+ {
+ SET_WEIGHT( weights[0], 0, 1, 0, 0 );
+ return;
+ }
+ else
+ SET_WEIGHT( weights[0], 1, minscale, mindenom, minoff );
+
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn )
+ fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
+
+ if( weights[0].weightfn && b_lookahead )
+ {
+ //scale lowres in lookahead for slicetype_frame_cost
+ int i_padv = PADV<<h->param.b_interlaced;
+ uint8_t *src = ref->buffer_lowres[0];
+ uint8_t *dst = h->mb.p_weight_buf[0];
+ int width = ref->i_width_lowres + PADH*2;
+ int height = ref->i_lines_lowres + i_padv*2;
+ x264_weight_scale_plane( h, dst, ref->i_stride_lowres, src, ref->i_stride_lowres,
+ width, height, &weights[0] );
+ fenc->weighted[0] = h->mb.p_weight_buf[0] + PADH + ( ref->i_stride_lowres * i_padv );
+ }
+}
+
static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
- x264_frame_t **frames, int p0, int p1, int b,
- int dist_scale_factor, int do_search[2] )
+ x264_frame_t **frames, int p0, int p1, int b,
+ int dist_scale_factor, int do_search[2], const x264_weight_t *w )
{
x264_frame_t *fref0 = frames[p0];
x264_frame_t *fref1 = frames[p1];
(dst)[2] = &(src)[2][i_pel_offset]; \
(dst)[3] = &(src)[3][i_pel_offset]; \
}
+#define LOAD_WPELS_LUMA(dst,src) \
+ (dst) = &(src)[i_pel_offset];
+
#define CLIP_MV( mv ) \
{ \
mv[0] = x264_clip3( mv[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); \
uint8_t *src1, *src2; \
int i_cost; \
src1 = h->mc.get_ref( pix1, &stride1, m[0].p_fref, m[0].i_stride[0], \
- (mv0)[0], (mv0)[1], 8, 8 ); \
+ (mv0)[0], (mv0)[1], 8, 8, w ); \
src2 = h->mc.get_ref( pix2, &stride2, m[1].p_fref, m[1].i_stride[0], \
- (mv1)[0], (mv1)[1], 8, 8 ); \
+ (mv1)[0], (mv1)[1], 8, 8, w ); \
h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
m[0].p_cost_mv = a->p_cost_mv;
m[0].i_stride[0] = i_stride;
m[0].p_fenc[0] = h->mb.pic.p_fenc[0];
+ m[0].weight = w;
LOAD_HPELS_LUMA( m[0].p_fref, fref0->lowres );
+ m[0].p_fref_w = m[0].p_fref[0];
+ if( w[0].weightfn )
+ LOAD_WPELS_LUMA( m[0].p_fref_w, fenc->weighted[0] );
if( b_bidir )
{
int dmv[2][2];
h->mc.memcpy_aligned( &m[1], &m[0], sizeof(x264_me_t) );
+ m[1].i_ref = p1;
+ m[1].weight = weight_none;
LOAD_HPELS_LUMA( m[1].p_fref, fref1->lowres );
+ m[1].p_fref_w = m[1].p_fref[0];
dmv[0][0] = ( mvr[0] * dist_scale_factor + 128 ) >> 8;
dmv[0][1] = ( mvr[1] * dist_scale_factor + 128 ) >> 8;
h->sps->i_mb_width * h->sps->i_mb_height)
static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
- x264_frame_t **frames, int p0, int p1, int b,
- int b_intra_penalty )
+ x264_frame_t **frames, int p0, int p1, int b,
+ int b_intra_penalty )
{
int i_score = 0;
/* Don't use the AQ'd scores for slicetype decision. */
int i_score_aq = 0;
int do_search[2];
-
+ const x264_weight_t *w = weight_none;
/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
* the preceding frames as B. (is this still true?) */
/* For each list, check to see whether we have lowres motion-searched this reference frame before. */
do_search[0] = b != p0 && frames[b]->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF;
do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
- if( do_search[0] ) frames[b]->lowres_mvs[0][b-p0-1][0][0] = 0;
+ if( do_search[0] )
+ {
+ if( ( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART
+ || h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE ) && b == p1 )
+ {
+ x264_weights_analyse( h, frames[b], frames[p0], 1, 1 );
+ w = frames[b]->weight[0];
+ }
+ frames[b]->lowres_mvs[0][b-p0-1][0][0] = 0;
+ }
if( do_search[1] ) frames[b]->lowres_mvs[1][p1-b-1][0][0] = 0;
if( b == p1 )
row_satd[ h->mb.i_mb_y ] = 0;
for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
{
- int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search );
+ int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
int i_mb_cost_aq = i_mb_cost;
if( h->param.rc.i_aq_mode )
i_mb_cost_aq = (i_mb_cost_aq * frames[b]->i_inv_qscale_factor[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride] + 128) >> 8;
for( h->mb.i_mb_y = h->sps->i_mb_height - 2; h->mb.i_mb_y > 0; h->mb.i_mb_y-- )
for( h->mb.i_mb_x = h->sps->i_mb_width - 2; h->mb.i_mb_x > 0; h->mb.i_mb_x-- )
{
- int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search );
+ int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
int i_mb_cost_aq = i_mb_cost;
if( h->param.rc.i_aq_mode )
i_mb_cost_aq = (i_mb_cost_aq * frames[b]->i_inv_qscale_factor[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride] + 128) >> 8;
return i_score;
}
-static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame )
+static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, int ref0_distance )
{
int mb_index;
x264_emms();
+ float weightdelta = 0.0;
+ if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
+ weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
+
/* Allow the strength to be adjusted via qcompress, since the two
* concepts are very similar. */
float strength = 5.0f * (1.0f - h->param.rc.f_qcompress);
{
int propagate_cost = frame->i_propagate_cost[mb_index];
float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
- frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - strength * log2_ratio;
+ frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - strength * ( log2_ratio + weightdelta );
}
}
}
}
if( h->param.rc.i_vbv_buffer_size && b == p1 )
- x264_macroblock_tree_finish( h, frames[b] );
+ x264_macroblock_tree_finish( h, frames[b], b-p0 );
}
static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
last_nonb = cur_nonb;
}
- x264_macroblock_tree_finish( h, frames[last_nonb] );
+ x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
}
static int x264_vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b )
}
}
+ /* Analyse for weighted P frames */
+ if( h->lookahead->next.list[bframes]->i_type == X264_TYPE_P )
+ {
+ memset( h->lookahead->next.list[bframes]->weight, 0, sizeof(h->lookahead->next.list[bframes]->weight) );
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->param.i_threads > 1 )
+ x264_weights_analyse( h, h->lookahead->next.list[bframes], h->lookahead->last_nonb, 1, 0 );
+ }
+
/* shift sequence to coded order.
use a small temporary list to avoid shifting the entire next buffer around */
int i_dts = h->lookahead->next.list[0]->i_frame;
#define MC_TEST_LUMA( w, h ) \
if( mc_a.mc_luma != mc_ref.mc_luma && !(w&(w-1)) && h<=16 ) \
{ \
+ const x264_weight_t *weight = weight_none; \
set_func_name( "mc_luma_%dx%d", w, h );\
used_asm = 1; \
memset(buf3, 0xCD, 1024); \
memset(buf4, 0xCD, 1024); \
- call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h ); \
- call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h ); \
+ call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
+ call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h, weight ); \
if( memcmp( buf3, buf4, 1024 ) ) \
{ \
fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
{ \
uint8_t *ref = dst2; \
int ref_stride = 32; \
+ const x264_weight_t *weight = weight_none; \
set_func_name( "get_ref_%dx%d", w, h );\
used_asm = 1; \
memset(buf3, 0xCD, 1024); \
memset(buf4, 0xCD, 1024); \
- call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h ); \
- ref = (uint8_t*) call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h ); \
+ call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
+ ref = (uint8_t*) call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
for( i=0; i<h; i++ ) \
if( memcmp( dst1+i*32, ref+i*ref_stride, w ) ) \
{ \
MC_TEST_AVG( avg, w );
report( "mc wpredb :" );
+#define MC_TEST_WEIGHT( name, weight, aligned ) \
+ int align_off = (aligned ? 0 : rand()%16); \
+ for( i = 1, ok = 1, used_asm = 0; i <= 5; i++ ) \
+ { \
+ ALIGNED_16( uint8_t buffC[640] ); \
+ ALIGNED_16( uint8_t buffA[640] ); \
+ j = X264_MAX( i*4, 2 ); \
+ memset( buffC, 0, 640 ); \
+ memset( buffA, 0, 640 ); \
+ x264_t ha; \
+ ha.mc = mc_a; \
+ /* w12 is the same as w16 in some cases */ \
+ if( i == 3 && mc_a.name[i] == mc_a.name[i+1] ) \
+ continue; \
+ if( mc_a.name[i] != mc_ref.name[i] ) \
+ { \
+ int k; \
+ set_func_name( "%s_w%d", #name, j ); \
+ used_asm = 1; \
+ call_c1( mc_c.weight[i], buffC, 32, buf2+align_off, 32, &weight, 16 ); \
+ mc_a.weight_cache(&ha, &weight); \
+ call_a1( weight.weightfn[i], buffA, 32, buf2+align_off, 32, &weight, 16 ); \
+ for( k = 0; k < 16; k++ ) \
+ if( memcmp( &buffC[k*32], &buffA[k*32], j ) ) \
+ { \
+ ok = 0; \
+ fprintf( stderr, #name "[%d]: [FAILED] s:%d o:%d d%d\n", i, s, o, d ); \
+ break; \
+ } \
+ call_c2( mc_c.weight[i], buffC, 32, buf2+align_off, 32, &weight, 16 ); \
+ call_a2( weight.weightfn[i], buffA, 32, buf2+align_off, 32, &weight, 16 ); \
+ } \
+ }
+
+ ok = 1; used_asm = 0;
+
+ int s,o,d;
+ int align_cnt = 0;
+ for( s = 0; s <= 127 && ok; s++ )
+ {
+ for( o = -128; o <= 127 && ok; o++ )
+ {
+ if( rand() & 2047 ) continue;
+ for( d = 0 ; d <= 7 && ok; d++ )
+ {
+ if( s == 1<<d )
+ continue;
+ x264_weight_t weight = { .i_scale = s, .i_denom = d, .i_offset = o };
+ MC_TEST_WEIGHT( weight, weight, (align_cnt++ % 4) );
+ }
+ }
+
+ }
+ report( "mc weight :" );
+
+ ok = 1; used_asm = 0;
+ s = 1; d = 0;
+ for( o = 0; o <= 127 && ok; o++ )
+ {
+ if( rand() & 15 ) continue;
+ x264_weight_t weight = { .i_scale = 1, .i_denom = 0, .i_offset = o };
+ MC_TEST_WEIGHT( offsetadd, weight, (align_cnt++ % 4) );
+ }
+ report( "mc offsetadd :" );
+ ok = 1; used_asm = 0;
+ for( o = -128; o < 0 && ok; o++ )
+ {
+ if( rand() & 15 ) continue;
+ x264_weight_t weight = { .i_scale = 1, .i_denom = 0, .i_offset = o };
+ MC_TEST_WEIGHT( offsetsub, weight, (align_cnt++ % 4) );
+ }
+ report( "mc offsetsub :" );
+
if( mc_a.hpel_filter != mc_ref.hpel_filter )
{
uint8_t *src = buf1+8+2*64;
call_c( mc_c.mbtree_propagate_cost, dstc, prop, intra, inter, qscale, 400 );
call_a( mc_a.mbtree_propagate_cost, dsta, prop, intra, inter, qscale, 400 );
// I don't care about exact rounding, this is just how close the floating-point implementation happens to be
+ x264_emms();
for( i=0; i<400; i++ )
ok &= abs(dstc[i]-dsta[i]) <= (abs(dstc[i])>512) || fabs((double)dstc[i]/dsta[i]-1) < 1e-6;
report( "mbtree propagate :" );
" - none, spatial, temporal, auto\n",
strtable_lookup( x264_direct_pred_names, defaults->analyse.i_direct_mv_pred ) );
H2( " --no-weightb Disable weighted prediction for B-frames\n" );
+ H1( " --weightp Weighted prediction for P-frames [2]\n"
+ " - 0: Disabled\n"
+ " - 1: Blind offset\n"
+ " - 2: Smart analysis\n");
H1( " --me <string> Integer pixel motion estimation method [\"%s\"]\n",
strtable_lookup( x264_motion_est_names, defaults->analyse.i_me_method ) );
H2( " - dia: diamond search, radius 1 (fast)\n"
{ "direct", required_argument, NULL, 0 },
{ "weightb", no_argument, NULL, 'w' },
{ "no-weightb", no_argument, NULL, 0 },
+ { "weightp", required_argument, NULL, 0 },
{ "me", required_argument, NULL, 0 },
{ "merange", required_argument, NULL, 0 },
{ "mvrange", required_argument, NULL, 0 },
param->analyse.i_trellis = 0;
param->i_bframe_adaptive = X264_B_ADAPT_NONE;
param->rc.b_mb_tree = 0;
+ param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
}
else if( !strcasecmp( optarg, "veryfast" ) )
{
param->analyse.b_mixed_references = 0;
param->analyse.i_trellis = 0;
param->rc.b_mb_tree = 0;
+ param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
}
else if( !strcasecmp( optarg, "faster" ) )
{
param->i_frame_reference = 2;
param->analyse.i_subpel_refine = 4;
param->rc.b_mb_tree = 0;
+ param->analyse.i_weighted_pred = X264_WEIGHTP_BLIND;
}
else if( !strcasecmp( optarg, "fast" ) )
{
param->b_deblocking_filter = 0;
param->b_cabac = 0;
param->analyse.b_weighted_bipred = 0;
+ param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
}
else if( !strcasecmp( optarg, "touhou" ) )
{
param->b_cabac = 0;
param->i_cqm_preset = X264_CQM_FLAT;
param->i_bframe = 0;
+ param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
if( param->b_interlaced )
{
fprintf( stderr, "x264 [error]: baseline profile doesn't support interlacing\n" );
#include <stdarg.h>
-#define X264_BUILD 78
+#define X264_BUILD 79
/* x264_t:
* opaque handler for encoder */
#define X264_B_ADAPT_NONE 0
#define X264_B_ADAPT_FAST 1
#define X264_B_ADAPT_TRELLIS 2
+#define X264_WEIGHTP_NONE 0
+#define X264_WEIGHTP_BLIND 1
+#define X264_WEIGHTP_SMART 2
#define X264_B_PYRAMID_NONE 0
#define X264_B_PYRAMID_STRICT 1
#define X264_B_PYRAMID_NORMAL 2
unsigned int inter; /* inter partitions */
int b_transform_8x8;
+ int i_weighted_pred; /* weighting for P-frames */
int b_weighted_bipred; /* implicit weighting for B-frames */
int i_direct_mv_pred; /* spatial vs temporal mv prediction */
int i_chroma_qp_offset;