param->rc.i_qp_max = 51;
param->rc.i_qp_step = 4;
param->rc.f_ip_factor = 1.4;
- param->rc.f_pb_factor = 1.4;
+ param->rc.f_pb_factor = 1.3;
param->rc.b_stat_write = 0;
param->rc.psz_stat_out = "x264_2pass.log";
/* */
param->analyse.intra = X264_ANALYSE_I4x4;
- param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16;
+ param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
+ param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL;
param->analyse.i_subpel_refine = 1;
param->analyse.b_psnr = 1;
}
/* MB table and cache for current frame/mb */
struct
{
+ int i_mb_count; /* number of mbs in a frame */
+
/* Strides */
int i_mb_stride;
int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */
int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
- int16_t (*mvr[2][16])[2]; /* mb mv for each possible ref */
+ int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */
+ int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
+
+ /* for B_SKIP and B_DIRECT motion prediction */
+ struct
+ {
+ int16_t (*mv)[2]; /* keep only L0 */
+ int8_t *ref;
+ } list1ref0;
/* current value */
int i_type;
/* 0 if non avaible */
int16_t mv[2][X264_SCAN8_SIZE][2];
int16_t mvd[2][X264_SCAN8_SIZE][2];
+
+ /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
+ int8_t skip[X264_SCAN8_SIZE];
+
+ int16_t direct_mv[2][X264_SCAN8_SIZE][2];
+ int8_t direct_ref[2][X264_SCAN8_SIZE];
} cache;
/* */
}
}
+static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
+{
+ int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
+ int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
+ int i;
+
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
+
+ for( i = 0; i < 4; i++ )
+ {
+ const int x8 = 2*(i%2);
+ const int y8 = 2*(i/2);
+ /* TODO: MapColToList0 */
+ const int i_ref = h->mb.list1ref0.ref[ i_mb_8x8 + x8/2 + y8 * h->mb.i_mb_stride ];
+
+ if( i_ref == -1 )
+ {
+ x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, 0 );
+ x264_macroblock_cache_mv( h, x8, y8, 2, 2, 0, 0, 0 );
+ x264_macroblock_cache_mv( h, x8, y8, 2, 2, 1, 0, 0 );
+ }
+ else
+ {
+ int tb = x264_clip3( h->fdec->i_poc - h->fref0[i_ref]->i_poc, -128, 127 );
+ int td = x264_clip3( h->fref1[0]->i_poc - h->fref0[i_ref]->i_poc, -128, 127 );
+ int tx = (16384 + (abs(td) >> 1)) / td;
+ int dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
+ int x4, y4;
+
+ x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, i_ref );
+
+ for( y4 = y8; y4 < y8+2; y4++ )
+ for( x4 = x8; x4 < x8+2; x4++ )
+ {
+ const int16_t *mv_col = h->mb.list1ref0.mv[ i_mb_4x4 + x4 + y4 * 4 * h->mb.i_mb_stride ];
+ if( td == 0 /* || pic0 is a long-term ref */ )
+ {
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_col[0], mv_col[1] );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
+ }
+ else
+ {
+ int mv_l0[2];
+ mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
+ mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_l0[0], mv_l0[1] );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
+{
+ int ref[2];
+ int mv[2][2];
+ int i_list;
+ int i8, i4;
+ const int s8x8 = 2 * h->mb.i_mb_stride;
+ const int s4x4 = 4 * h->mb.i_mb_stride;
+ const int8_t *l1ref = &h->mb.list1ref0.ref[ 2*h->mb.i_mb_x + 2*s8x8*h->mb.i_mb_y ];
+ const int16_t (*l1mv)[2] = (const int16_t (*)[2])
+ &h->mb.list1ref0.mv[ 4*h->mb.i_mb_x + 4*s4x4*h->mb.i_mb_y ];
+
+ for( i_list=0; i_list<2; i_list++ )
+ {
+ int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
+ int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
+ int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
+ if( i_refc == -2 )
+ i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
+
+ ref[i_list] = i_refa;
+ if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 ))
+ ref[i_list] = i_refb;
+ if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 ))
+ ref[i_list] = i_refc;
+ if( ref[i_list] < 0 )
+ ref[i_list] = -1;
+ }
+
+ if( ref[0] < 0 && ref[1] < 0 )
+ {
+ ref[0] =
+ ref[1] = 0;
+ mv[0][0] =
+ mv[0][1] =
+ mv[1][0] =
+ mv[1][1] = 0;
+ }
+ else
+ {
+ for( i_list=0; i_list<2; i_list++ )
+ {
+ if( ref[i_list] >= 0 )
+ x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] );
+ else
+ mv[i_list][0] = mv[i_list][1] = 0;
+ }
+ }
+
+ /* FIXME: clip mv ? */
+
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
+ x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, mv[0][0], mv[0][1] );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, mv[1][0], mv[1][1] );
+
+ /* col_zero_flag */
+ for( i8=0; i8<4; i8++ )
+ {
+ const int x8 = i8%2;
+ const int y8 = i8/2;
+ if( l1ref[ x8 + y8*s8x8 ] == 0 )
+ {
+ for( i4=0; i4<4; i4++ )
+ {
+ const int x4 = i4%2 + 2*x8;
+ const int y4 = i4/2 + 2*y8;
+ const int16_t *mvcol = l1mv[x4 + y4*s4x4];
+ if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
+ {
+ if( ref[0] == 0 )
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0, 0 );
+ if( ref[1] == 0 )
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+int x264_mb_predict_mv_direct16x16( x264_t *h )
+{
+ int b_available;
+ if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
+ return 0;
+ else if( h->sh.b_direct_spatial_mv_pred )
+ b_available = x264_mb_predict_mv_direct16x16_spatial( h );
+ else
+ b_available = x264_mb_predict_mv_direct16x16_temporal( h );
+
+ /* cache ref & mv */
+ if( b_available )
+ {
+ int i, l;
+ for( l = 0; l < 2; l++ )
+ for( i = 0; i < 4; i++ )
+ h->mb.cache.direct_ref[l][i] = h->mb.cache.ref[l][x264_scan8[i*4]];
+ memcpy(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
+ }
+
+ return b_available;
+}
+
+void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
+{
+ const int x = 2*(idx%2);
+ const int y = 2*(idx/2);
+ int l;
+ x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
+ x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
+ for( l = 0; l < 2; l++ )
+ {
+ *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]] =
+ *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]];
+ *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]+8] =
+ *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]+8];
+ }
+}
+
+/* This just improves encoder performance, it's not part of the spec */
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc )
{
int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
}
+static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
+{
+ const int i8 = x264_scan8[0] + x + 8*y;
+
+ /* FIXME: optimize based on current block size, not global settings? */
+ if( h->sps->b_direct8x8_inference )
+ {
+ if( h->mb.cache.ref[0][i8] >= 0 )
+ if( h->mb.cache.ref[1][i8] >= 0 )
+ x264_mb_mc_01xywh( h, x, y, 2, 2 );
+ else
+ x264_mb_mc_0xywh( h, x, y, 2, 2 );
+ else
+ x264_mb_mc_1xywh( h, x, y, 2, 2 );
+ }
+ else
+ {
+ if( h->mb.cache.ref[0][i8] >= 0 )
+ {
+ if( h->mb.cache.ref[1][i8] >= 0 )
+ {
+ x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
+ }
+ else
+ {
+ x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
+ }
+ }
+ else
+ {
+ x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
+ }
+ }
+}
void x264_mb_mc( x264_t *h )
{
x264_mb_mc_0xywh( h, 2, 0, 2, 4 );
}
}
- else if( h->mb.i_type == P_8x8 )
+ else if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 )
{
int i;
for( i = 0; i < 4; i++ )
x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
break;
+ case D_L1_8x8:
+ x264_mb_mc_1xywh( h, x, y, 2, 2 );
+ break;
+ case D_L1_8x4:
+ x264_mb_mc_1xywh( h, x, y+0, 2, 1 );
+ x264_mb_mc_1xywh( h, x, y+1, 2, 1 );
+ break;
+ case D_L1_4x8:
+ x264_mb_mc_1xywh( h, x+0, y, 1, 2 );
+ x264_mb_mc_1xywh( h, x+1, y, 1, 2 );
+ break;
+ case D_L1_4x4:
+ x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
+ break;
+ case D_BI_8x8:
+ x264_mb_mc_01xywh( h, x, y, 2, 2 );
+ break;
+ case D_BI_8x4:
+ x264_mb_mc_01xywh( h, x, y+0, 2, 1 );
+ x264_mb_mc_01xywh( h, x, y+1, 2, 1 );
+ break;
+ case D_BI_4x8:
+ x264_mb_mc_01xywh( h, x+0, y, 1, 2 );
+ x264_mb_mc_01xywh( h, x+1, y, 1, 2 );
+ break;
+ case D_BI_4x4:
+ x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
+ x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
+ x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
+ break;
+ case D_DIRECT_8x8:
+ x264_mb_mc_direct8x8( h, x, y );
+ break;
}
}
}
- else if( h->mb.i_type == B_8x8 || h->mb.i_type == B_DIRECT )
+ else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT )
{
- x264_log( h, X264_LOG_ERROR, "mc_luma with unsupported mb\n" );
- return;
+ int i;
+ for( i = 0; i < 4; i++ )
+ {
+ const int x = 2*(i%2);
+ const int y = 2*(i/2);
+ x264_mb_mc_direct8x8( h, x, y );
+ }
}
else /* B_*x* */
{
void x264_macroblock_cache_init( x264_t *h )
{
int i, j;
- int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
+ int i_mb_count = h->mb.i_mb_count;
h->mb.i_mb_stride = h->sps->i_mb_width;
h->mb.type= x264_malloc( i_mb_count * sizeof( int8_t) );
h->mb.qp = x264_malloc( i_mb_count * sizeof( int8_t) );
h->mb.cbp = x264_malloc( i_mb_count * sizeof( int16_t) );
+ h->mb.skipbp = x264_malloc( i_mb_count * sizeof( int8_t) );
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
h->mb.intra4x4_pred_mode = x264_malloc( i_mb_count * 7 * sizeof( int8_t ) );
for( j=0; j<16; j++ ) /* FIXME: alloc no more than param.i_frame_reference */
h->mb.mvr[i][j] = x264_malloc( 2 * i_mb_count * sizeof( int16_t ) );
+ h->mb.list1ref0.ref = NULL;
+ h->mb.list1ref0.mv = NULL;
+ if( h->param.i_bframe )
+ {
+ h->mb.list1ref0.ref = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) );
+ h->mb.list1ref0.mv = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
+ }
+
/* init with not avaiable (for top right idx=7,15) */
memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
x264_free( h->mb.mvd[0] );
x264_free( h->mb.mvd[1] );
}
+ if( h->param.i_bframe )
+ {
+ x264_free( h->mb.list1ref0.ref );
+ x264_free( h->mb.list1ref0.mv );
+ }
x264_free( h->mb.mv[0] );
x264_free( h->mb.mv[1] );
x264_free( h->mb.ref[0] );
x264_free( h->mb.ref[1] );
x264_free( h->mb.intra4x4_pred_mode );
x264_free( h->mb.non_zero_count );
+ x264_free( h->mb.skipbp );
x264_free( h->mb.cbp );
x264_free( h->mb.qp );
x264_free( h->mb.type );
}
}
}
+
+ /* load skip */
+ if( h->param.b_cabac )
+ {
+ if( h->sh.i_type == SLICE_TYPE_B )
+ {
+ memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
+ if( i_left_xy >= 0 )
+ {
+ h->mb.cache.skip[x264_scan8[0] - 1] = h->mb.skipbp[i_left_xy] & 0x2;
+ h->mb.cache.skip[x264_scan8[8] - 1] = h->mb.skipbp[i_left_xy] & 0x8;
+ }
+ if( i_top_xy >= 0 )
+ {
+ h->mb.cache.skip[x264_scan8[0] - 8] = h->mb.skipbp[i_top_xy] & 0x4;
+ h->mb.cache.skip[x264_scan8[4] - 8] = h->mb.skipbp[i_top_xy] & 0x8;
+ }
+ }
+ else if( h->mb.i_mb_xy == 0 && h->sh.i_type == SLICE_TYPE_P )
+ {
+ memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
+ }
+ }
}
}
else
h->mb.chroma_pred_mode[i_mb_xy] = I_PRED_CHROMA_DC;
- if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) )
+ if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) )
{
int i_list;
for( i_list = 0; i_list < 2; i_list++ )
}
}
}
+ if( h->sh.i_type == SLICE_TYPE_B )
+ {
+ if( i_mb_type == B_SKIP || i_mb_type == B_DIRECT )
+ h->mb.skipbp[i_mb_xy] = 0xf;
+ else if( i_mb_type == B_8x8 )
+ {
+ int skipbp = 0;
+ for( i = 0; i < 4; i++ )
+ skipbp |= ( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) << i;
+ h->mb.skipbp[i_mb_xy] = skipbp;
+ }
+ else
+ h->mb.skipbp[i_mb_xy] = 0;
+ }
}
}
+void x264_macroblock_direct_ref_save( x264_t *h )
+{
+ /* Manipulation of ref numbers is unnecessary unless we allow
+ * ref list reordering, multiple B-frame delay, or B-frames as refs. */
+ memcpy( h->mb.list1ref0.ref, h->mb.ref[0], 4 * h->mb.i_mb_count * sizeof( int8_t ) );
+ memcpy( h->mb.list1ref0.mv, h->mb.mv[0], 2*16 * h->mb.i_mb_count * sizeof( int16_t ) );
+}
/* XXX mb_type isn't the one written in the bitstream -> only internal usage */
#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_16x16 )
#define IS_SKIP(type) ( (type) == P_SKIP || (type) == B_SKIP )
+#define IS_DIRECT(type) ( (type) == B_DIRECT )
enum mb_class_e
{
I_4x4 = 0,
D_16x16 = 16,
};
+static const int x264_mb_partition_listX_table[2][17] =
+{{
+ 1, 1, 1, 1, /* D_L0_* */
+ 0, 0, 0, 0, /* D_L1_* */
+ 1, 1, 1, 1, /* D_BI_* */
+ 0, /* D_DIRECT_8x8 */
+ 0, 0, 0, 0 /* 8x8 .. 16x16 */
+},
+{
+ 0, 0, 0, 0, /* D_L0_* */
+ 1, 1, 1, 1, /* D_L1_* */
+ 1, 1, 1, 1, /* D_BI_* */
+ 0, /* D_DIRECT_8x8 */
+ 0, 0, 0, 0 /* 8x8 .. 16x16 */
+}};
static const int x264_mb_partition_count_table[17] =
{
/* sub L0 */
void x264_macroblock_cache_save( x264_t *h );
void x264_macroblock_cache_end( x264_t *h );
+void x264_macroblock_direct_ref_save( x264_t *h );
+
void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int i_qscale );
void x264_mb_dequant_2x2_dc( int16_t dct[2][2], int i_qscale );
void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale );
* h->mb. need only valid values from other blocks */
void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] );
/* x264_mb_predict_mv:
- * set mvp with predicted mv for all blocks except P_SKIP
+ * set mvp with predicted mv for all blocks except SKIP and DIRECT
* h->mb. need valid ref/partition/sub of current block to be valid
* and valid mv/ref from other blocks . */
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] );
+/* x264_mb_predict_mv_direct16x16:
+ * set h->mb.cache.mv and h->mb.cache.ref for B_SKIP or B_DIRECT
+ * h->mb. need only valid values from other blocks
+ * return 1 on success, 0 on failure */
+int x264_mb_predict_mv_direct16x16( x264_t *h );
+/* x264_mb_load_mv_direct8x8:
+ * set h->mb.cache.mv and h->mb.cache.ref for B_DIRECT
+ * must be called only after x264_mb_predict_mv_direct16x16 */
+void x264_mb_load_mv_direct8x8( x264_t *h, int idx );
/* x264_mb_predict_mv_ref16x16:
* set mvc with D_16x16 prediction.
* uses all neighbors, even those that didn't end up using this ref.
- * need only valid values from other blocks */
+ * h->mb. need only valid values from other blocks */
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc );
}
}
}
+static inline void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
+{
+ int dy, dx;
+ for( dy = 0; dy < height; dy++ )
+ {
+ for( dx = 0; dx < width; dx++ )
+ {
+ h->mb.cache.skip[X264_SCAN8_0+x+dx+8*(y+dy)] = b_skip;
+ }
+ }
+}
#endif
x264_mb_analysis_list_t l1;
int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
+ int i_cost16x16direct;
+ int i_cost8x8bi;
+ int i_cost8x8direct[4];
+
+ int b_direct_available;
} x264_mb_analysis_t;
a->l1.i_cost4x4[i] = -1;
a->l1.i_cost8x4[i] = -1;
a->l1.i_cost4x8[i] = -1;
+ a->i_cost8x8direct[i] = -1;
}
a->l1.i_cost16x8 = -1;
a->l1.i_cost8x16 = -1;
a->i_cost16x16bi = -1;
+ a->i_cost16x16direct = -1;
+ a->i_cost8x8bi = -1;
}
}
}
a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost;
}
+static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
+{
+ /* Assumes that fdec still contains the results of
+ * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
+
+ uint8_t *p_fenc = h->mb.pic.p_fenc[0];
+ uint8_t *p_fdec = h->mb.pic.p_fdec[0];
+ int i_stride= h->mb.pic.i_stride[0];
+ int i;
+
+ a->i_cost16x16direct = 0;
+ for( i = 0; i < 4; i++ )
+ {
+ const int x8 = i%2;
+ const int y8 = i/2;
+ const int off = 8 * x8 + 8 * i_stride * y8;
+ a->i_cost16x16direct +=
+ a->i_cost8x8direct[i] =
+ h->pixf.satd[PIXEL_8x8]( &p_fenc[off], i_stride, &p_fdec[off], i_stride );
+ }
+}
static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
{
bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
}
+static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
+{
+ uint8_t pix[2][8*8];
+ uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
+ h->mb.pic.p_fref[1][a->l1.i_ref][0] };
+ uint8_t *p_fenc = h->mb.pic.p_fenc[0];
+ int mvc[2][5][2], i_mvc[2];
+ int i, j;
+
+ /* XXX Needed for x264_mb_predict_mv */
+ h->mb.i_partition = D_8x8;
+
+ a->i_cost8x8bi = 0;
+
+ i_mvc[0] = i_mvc[1] = 1;
+ mvc[0][0][0] = a->l0.me16x16.mv[0];
+ mvc[0][0][1] = a->l0.me16x16.mv[1];
+ mvc[1][0][0] = a->l1.me16x16.mv[0];
+ mvc[1][0][1] = a->l1.me16x16.mv[1];
+
+
+ for( i = 0; i < 4; i++ )
+ {
+ const int x8 = i%2;
+ const int y8 = i/2;
+ uint8_t *p_fenc_i = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
+ int i_part_cost;
+ int i_part_cost_bi = 0;
+
+ for( j = 0; j < 2; j++ )
+ {
+ x264_mb_analysis_list_t *l = j ? &a->l1 : &a->l0;
+ x264_me_t *m = &l->me8x8[i];
+
+ m->i_pixel = PIXEL_8x8;
+ m->lm = a->i_lambda;
+
+ m->p_fenc = p_fenc_i;
+ m->p_fref = &p_fref[j][8*(y8*h->mb.pic.i_stride[0]+x8)];
+ m->i_stride = h->mb.pic.i_stride[0];
+ m->i_mv_range = a->i_mv_range;
+
+ x264_mb_predict_mv( h, j, 4*i, 2, m->mvp );
+ x264_me_search( h, m, mvc[j], i_mvc[j] );
+
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, j, m->mv[0], m->mv[1] );
+ l->i_cost8x8 += m->cost;
+
+ /* BI mode */
+ h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[j], 8,
+ m->mv[0], m->mv[1], 8, 8 );
+ /* FIXME: add ref cost */
+ i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
+ bs_size_se( m->mv[1] - m->mvp[1] ) );
+ }
+
+ h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 );
+ i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
+
+ i_part_cost = a->l0.me8x8[i].cost;
+ h->mb.i_sub_partition[i] = D_L0_8x8;
+ if( a->l1.me8x8[i].cost < i_part_cost )
+ {
+ i_part_cost = a->l1.me8x8[i].cost;
+ h->mb.i_sub_partition[i] = D_L1_8x8;
+ }
+ if( i_part_cost_bi < i_part_cost )
+ {
+ i_part_cost = i_part_cost_bi;
+ h->mb.i_sub_partition[i] = D_BI_8x8;
+ }
+ if( a->i_cost8x8direct[i] < i_part_cost && a->i_cost8x8direct[i] >= 0)
+ {
+ i_part_cost = a->i_cost8x8direct[i];
+ h->mb.i_sub_partition[i] = D_DIRECT_8x8;
+ }
+ a->i_cost8x8bi += i_part_cost;
+
+ /* XXX Needed for x264_mb_predict_mv */
+ if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
+ {
+ x264_mb_load_mv_direct8x8( h, i );
+ x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 );
+ x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 );
+ x264_macroblock_cache_skip( h, 2*x8, 2*y8, 2, 2, 1 );
+ }
+ else
+ {
+ if( h->mb.i_sub_partition[i] == D_L1_8x8 )
+ {
+ x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, -1 );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 );
+ x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 );
+ }
+ else
+ {
+ x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, a->l0.i_ref );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, a->l0.me8x8[i].mv[0], a->l0.me8x8[i].mv[1] );
+ }
+
+ if( h->mb.i_sub_partition[i] == D_L0_8x8 )
+ {
+ x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 1, -1 );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 );
+ x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 );
+ }
+ else
+ {
+ x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 1, a->l1.i_ref );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, a->l1.me8x8[i].mv[0], a->l1.me8x8[i].mv[1] );
+ }
+ }
+ }
+}
+
/*****************************************************************************
* x264_macroblock_analyse:
*****************************************************************************/
}
else if( h->sh.i_type == SLICE_TYPE_B )
{
+ const unsigned int i_neighbour = h->mb.i_neighbour;
+ const unsigned int flags = h->param.analyse.inter;
+ int b_skip = 0;
int i_cost;
- /* best inter mode */
- x264_mb_analyse_inter_b16x16( h, &analysis );
- h->mb.i_type = B_L0_L0;
- h->mb.i_partition = D_16x16;
- i_cost = analysis.l0.me16x16.cost;
-
- if( analysis.l1.me16x16.cost < i_cost )
+ analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
+ if( analysis.b_direct_available )
{
- h->mb.i_type = B_L1_L1;
- i_cost = analysis.l1.me16x16.cost;
- }
- if( analysis.i_cost16x16bi < i_cost )
- {
- h->mb.i_type = B_BI_BI;
- i_cost = analysis.i_cost16x16bi;
- }
+ h->mb.i_type = B_SKIP;
+ x264_mb_mc( h );
- /* best intra mode */
- x264_mb_analyse_intra( h, &analysis );
- if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
- {
- h->mb.i_type = I_16x16;
- i_cost = analysis.i_sad_i16x16;
+ /* Conditioning the probe on neighboring block types
+ * doesn't seem to help speed or quality. */
+ b_skip = x264_macroblock_probe_bskip( h );
}
- if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
+
+ if( !b_skip )
{
- h->mb.i_type = I_4x4;
- i_cost = analysis.i_sad_i4x4;
+ /* best inter mode */
+ /* direct must be first */
+ if( analysis.b_direct_available )
+ x264_mb_analyse_inter_direct( h, &analysis );
+
+ x264_mb_analyse_inter_b16x16( h, &analysis );
+
+ /* 8x8 must be last */
+ if( flags & X264_ANALYSE_BSUB16x16 )
+ x264_mb_analyse_inter_b8x8( h, &analysis );
+
+ h->mb.i_type = B_L0_L0;
+ h->mb.i_partition = D_16x16;
+ i_cost = analysis.l0.me16x16.cost;
+ if( analysis.l1.me16x16.cost < i_cost )
+ {
+ h->mb.i_type = B_L1_L1;
+ i_cost = analysis.l1.me16x16.cost;
+ }
+ if( analysis.i_cost16x16bi < i_cost )
+ {
+ h->mb.i_type = B_BI_BI;
+ i_cost = analysis.i_cost16x16bi;
+ }
+ if( analysis.i_cost16x16direct < i_cost && analysis.i_cost16x16direct >= 0 )
+ {
+ h->mb.i_type = B_DIRECT;
+ i_cost = analysis.i_cost16x16direct;
+ }
+ if( analysis.i_cost8x8bi < i_cost && analysis.i_cost8x8bi >= 0 )
+ {
+ h->mb.i_type = B_8x8;
+ h->mb.i_partition = D_8x8;
+ i_cost = analysis.i_cost8x8bi;
+ }
+
+ /* refine qpel */
+ if( h->mb.i_partition == D_16x16 )
+ {
+ if( h->mb.i_type == B_L0_L0 )
+ {
+ x264_me_refine_qpel( h, &analysis.l0.me16x16 );
+ i_cost = analysis.l0.me16x16.cost;
+ }
+ else if( h->mb.i_type == B_L1_L1 )
+ {
+ x264_me_refine_qpel( h, &analysis.l1.me16x16 );
+ i_cost = analysis.l1.me16x16.cost;
+ }
+ }
+ /* TODO: refine bidir, 8x8 */
+
+ /* best intra mode */
+ x264_mb_analyse_intra( h, &analysis );
+ if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost )
+ {
+ h->mb.i_type = I_16x16;
+ i_cost = analysis.i_sad_i16x16;
+ }
+ if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost )
+ {
+ h->mb.i_type = I_4x4;
+ i_cost = analysis.i_sad_i4x4;
+ }
}
}
-#undef BEST_TYPE
/*-------------------- Update MB from the analysis ----------------------*/
h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
break;
}
+ case B_SKIP:
+ case B_DIRECT:
+ /* probably unnecessary for B_SKIP */
+ x264_mb_load_mv_direct8x8( h, 0 );
+ x264_mb_load_mv_direct8x8( h, 1 );
+ x264_mb_load_mv_direct8x8( h, 2 );
+ x264_mb_load_mv_direct8x8( h, 3 );
+ break;
+
case B_L0_L0:
switch( h->mb.i_partition )
{
break;
}
break;
+ case B_8x8:
+ /* nothing to do: caches were updated during analysis */
+ break;
default:
fprintf( stderr, "internal error (invalid MB type)\n" );
int i_ref = h->mb.cache.ref[i_list][i8];
int ctx = 0;
- if( i_refa > 0 )
+ if( i_refa > 0 && !h->mb.cache.skip[i8 - 1])
ctx++;
- if( i_refb > 0 )
+ if( i_refb > 0 && !h->mb.cache.skip[i8 - 8])
ctx += 2;
while( i_ref > 0 )
x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mdx, mdy );
}
+static inline void x264_cabac_mb8x8_mvd( x264_t *h, int i_list )
+{
+ int i;
+ for( i = 0; i < 4; i++ )
+ {
+ if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
+ {
+ continue;
+ }
+
+ switch( h->mb.i_sub_partition[i] )
+ {
+ case D_L0_8x8:
+ case D_L1_8x8:
+ case D_BI_8x8:
+ x264_cabac_mb_mvd( h, i_list, 4*i, 2, 2 );
+ break;
+ case D_L0_8x4:
+ case D_L1_8x4:
+ case D_BI_8x4:
+ x264_cabac_mb_mvd( h, i_list, 4*i+0, 2, 1 );
+ x264_cabac_mb_mvd( h, i_list, 4*i+2, 2, 1 );
+ break;
+ case D_L0_4x8:
+ case D_L1_4x8:
+ case D_BI_4x8:
+ x264_cabac_mb_mvd( h, i_list, 4*i+0, 1, 2 );
+ x264_cabac_mb_mvd( h, i_list, 4*i+1, 1, 2 );
+ break;
+ case D_L0_4x4:
+ case D_L1_4x4:
+ case D_BI_4x4:
+ x264_cabac_mb_mvd( h, i_list, 4*i+0, 1, 1 );
+ x264_cabac_mb_mvd( h, i_list, 4*i+1, 1, 1 );
+ x264_cabac_mb_mvd( h, i_list, 4*i+2, 1, 1 );
+ x264_cabac_mb_mvd( h, i_list, 4*i+3, 1, 1 );
+ break;
+ }
+ }
+}
+
static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
{
/* TODO: clean up/optimize */
const int i_mb_pos_start = bs_pos( s );
int i_mb_pos_tex;
+ int i_list;
int i;
/* Write the MB type */
x264_cabac_mb_ref( h, 0, 12 );
}
- for( i = 0; i < 4; i++ )
+ x264_cabac_mb8x8_mvd( h, 0 );
+ }
+ else if( i_mb_type == B_8x8 )
+ {
+ /* sub mb type */
+ x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[0] );
+ x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[1] );
+ x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[2] );
+ x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[3] );
+
+ /* ref */
+ for( i_list = 0; i_list < 2; i_list++ )
{
- switch( h->mb.i_sub_partition[i] )
+ if( ( i_list ? h->sh.i_num_ref_idx_l1_active : h->sh.i_num_ref_idx_l0_active ) == 1 )
+ continue;
+ for( i = 0; i < 4; i++ )
{
- case D_L0_8x8:
- x264_cabac_mb_mvd( h, 0, 4*i, 2, 2 );
- break;
- case D_L0_8x4:
- x264_cabac_mb_mvd( h, 0, 4*i+0, 2, 1 );
- x264_cabac_mb_mvd( h, 0, 4*i+2, 2, 1 );
- break;
- case D_L0_4x8:
- x264_cabac_mb_mvd( h, 0, 4*i+0, 1, 2 );
- x264_cabac_mb_mvd( h, 0, 4*i+1, 1, 2 );
- break;
- case D_L0_4x4:
- x264_cabac_mb_mvd( h, 0, 4*i+0, 1, 1 );
- x264_cabac_mb_mvd( h, 0, 4*i+1, 1, 1 );
- x264_cabac_mb_mvd( h, 0, 4*i+2, 1, 1 );
- x264_cabac_mb_mvd( h, 0, 4*i+3, 1, 1 );
- break;
+ if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
+ {
+ x264_cabac_mb_ref( h, i_list, 4*i );
+ }
}
}
- }
- else if( i_mb_type == B_8x8 )
- {
- /* TODO */
- fprintf( stderr, "Arggg B_8x8\n" );
- return;
+
+ x264_cabac_mb8x8_mvd( h, 0 );
+ x264_cabac_mb8x8_mvd( h, 1 );
}
else if( i_mb_type != B_DIRECT )
{
/* All B mode */
- int i_list;
int b_list[2][2];
/* init ref list utilisations */
1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
};
+static const uint8_t sub_mb_type_p_to_golomb[4]=
+{ 3, 1, 2, 0 };
+static const uint8_t sub_mb_type_b_to_golomb[13]=
+{ 10, 4, 5, 1, 11, 6, 7, 2, 12, 8, 9, 3, 0 };
static const uint8_t block_idx_x[16] =
{
}
}
+static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list )
+{
+ int i;
+ for( i = 0; i < 4; i++ )
+ {
+ int mvp[2];
+
+ if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
+ {
+ continue;
+ }
+
+ switch( h->mb.i_sub_partition[i] )
+ {
+ case D_L0_8x8:
+ case D_L1_8x8:
+ case D_BI_8x8:
+ x264_mb_predict_mv( h, i_list, 4*i, 2, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
+ break;
+ case D_L0_8x4:
+ case D_L1_8x4:
+ case D_BI_8x4:
+ x264_mb_predict_mv( h, i_list, 4*i+0, 2, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
+
+ x264_mb_predict_mv( h, i_list, 4*i+2, 2, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] );
+ break;
+ case D_L0_4x8:
+ case D_L1_4x8:
+ case D_BI_4x8:
+ x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
+
+ x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] );
+ break;
+ case D_L0_4x4:
+ case D_L1_4x4:
+ case D_BI_4x4:
+ x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] );
+
+ x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] );
+
+ x264_mb_predict_mv( h, i_list, 4*i+2, 1, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] );
+
+ x264_mb_predict_mv( h, i_list, 4*i+3, 1, mvp );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][0] - mvp[0] );
+ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][1] - mvp[1] );
+ break;
+ }
+ }
+}
+
/*****************************************************************************
* x264_macroblock_write:
*****************************************************************************/
/* sub mb type */
for( i = 0; i < 4; i++ )
{
- switch( h->mb.i_sub_partition[i] )
- {
- case D_L0_8x8:
- bs_write_ue( s, 0 );
- break;
- case D_L0_8x4:
- bs_write_ue( s, 1 );
- break;
- case D_L0_4x8:
- bs_write_ue( s, 2 );
- break;
- case D_L0_4x4:
- bs_write_ue( s, 3 );
- break;
- }
+ bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
}
/* ref0 */
if( h->sh.i_num_ref_idx_l0_active > 1 && b_sub_ref0 )
bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[8]] );
bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[12]] );
}
+
+ x264_sub_mb_mv_write_cavlc( h, s, 0 );
+ }
+ else if( i_mb_type == B_8x8 )
+ {
+ bs_write_ue( s, 22 );
+
+ /* sub mb type */
+ for( i = 0; i < 4; i++ )
+ {
+ bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
+ }
+ /* ref */
for( i = 0; i < 4; i++ )
{
- int mvp[2];
-
- switch( h->mb.i_sub_partition[i] )
+ if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
{
- case D_L0_8x8:
- x264_mb_predict_mv( h, 0, 4*i, 2, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] );
- break;
- case D_L0_8x4:
- x264_mb_predict_mv( h, 0, 4*i+0, 2, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] );
-
- x264_mb_predict_mv( h, 0, 4*i+2, 2, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][1] - mvp[1] );
- break;
- case D_L0_4x8:
- x264_mb_predict_mv( h, 0, 4*i+0, 1, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] );
-
- x264_mb_predict_mv( h, 0, 4*i+1, 1, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][1] - mvp[1] );
- break;
- case D_L0_4x4:
- x264_mb_predict_mv( h, 0, 4*i+0, 1, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] );
-
- x264_mb_predict_mv( h, 0, 4*i+1, 1, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][1] - mvp[1] );
-
- x264_mb_predict_mv( h, 0, 4*i+2, 1, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][1] - mvp[1] );
-
- x264_mb_predict_mv( h, 0, 4*i+3, 1, mvp );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+3]][0] - mvp[0] );
- bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+3]][1] - mvp[1] );
- break;
+ bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
}
}
- }
- else if( i_mb_type == B_8x8 )
- {
- fprintf( stderr, "invalid/unhandled mb_type (B_8x8)\n" );
- return;
+ for( i = 0; i < 4; i++ )
+ {
+ if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
+ {
+ bs_write_te( s, h->sh.i_num_ref_idx_l1_active - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
+ }
+ }
+ /* mvd */
+ x264_sub_mb_mv_write_cavlc( h, s, 0 );
+ x264_sub_mb_mv_write_cavlc( h, s, 1 );
}
else if( i_mb_type != B_DIRECT )
{
sh->i_redundant_pic_cnt = 0;
- sh->b_direct_spatial_mv_pred = 1;
+ sh->b_direct_spatial_mv_pred = ( param->analyse.i_direct_mv_pred == X264_DIRECT_PRED_SPATIAL );
sh->b_num_ref_idx_override = 0;
sh->i_num_ref_idx_l0_active = 1;
h->pps = &h->pps_array[0];
x264_pps_init( h->pps, 0, &h->param, h->sps);
+
+ h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
/* Init frames. */
for( i = 0; i < X264_BFRAME_MAX + 1; i++ )
{
int i;
+ /* save mvs for B-frame prediction */
+ if( h->param.i_bframe )
+ {
+ x264_macroblock_direct_ref_save( h );
+ }
+
/* apply deblocking filter to the current decoded picture */
if( h->param.b_deblocking_filter )
{
h->i_frame_num--;
/* Do IDR if needed and if we can (won't work with B frames) */
- if( h->frames.next[0] == NULL &&
+ if( h->frames.current[0] == NULL &&
h->frames.i_last_idr + 1 >= h->param.i_idrframe )
{
/* Reset */
}
}
+static void x264_macroblock_encode_skip( x264_t *h )
+{
+ int i;
+ h->mb.i_cbp_luma = 0x00;
+ h->mb.i_cbp_chroma = 0x00;
+
+ for( i = 0; i < 16+8; i++ )
+ {
+ h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
+ }
+
+ /* store cbp */
+ h->mb.cbp[h->mb.i_mb_xy] = 0;
+}
+
/*****************************************************************************
* x264_macroblock_encode_pskip:
* Encode an already marked skip block
{
const int mvx = h->mb.cache.mv[0][x264_scan8[0]][0];
const int mvy = h->mb.cache.mv[0][x264_scan8[0]][1];
- int i;
/* Motion compensation XXX probably unneeded */
h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
mvx, mvy, 8, 8 );
- h->mb.i_cbp_luma = 0x00;
- h->mb.i_cbp_chroma = 0x00;
-
- for( i = 0; i < 16+8; i++ )
- {
- h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
- }
-
- /* store cbp */
- h->mb.cbp[h->mb.i_mb_xy] = 0;
+ x264_macroblock_encode_skip( h );
}
/*****************************************************************************
x264_macroblock_encode_pskip( h );
return;
}
+ if( h->mb.i_type == B_SKIP )
+ {
+ /* XXX motion compensation is probably unneeded */
+ x264_mb_mc( h );
+ x264_macroblock_encode_skip( h );
+ return;
+ }
/* quantification scale */
i_qscale = h->mb.qp[h->mb.i_mb_xy];
}
}
}
+
+ /* Check for B_SKIP */
+ if( h->mb.i_type == B_DIRECT &&
+ h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
+ {
+ h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = B_SKIP;
+ h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
+ }
}
/*****************************************************************************
- * x264_macroblock_probe_pskip:
- * Check if the current MB could be encoded as a P_SKIP (it supposes you use
+ * x264_macroblock_probe_skip:
+ * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
* the previous QP
*****************************************************************************/
-int x264_macroblock_probe_pskip( x264_t *h )
+int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
{
DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
int i8x8, i4x4;
int i_decimate_mb;
- /* quantification scale */
+ /* quantization scale */
i_qp = h->mb.qp[h->mb.i_mb_xy];
- /* Get the MV */
- x264_mb_predict_mv_pskip( h, mvp );
+ if( !b_bidir )
+ {
+ /* Get the MV */
+ x264_mb_predict_mv_pskip( h, mvp );
- /* Special case, need to clip the vector */
- n = 16 * h->mb.i_mb_x + mvp[0];
- if( n < -24 )
- mvp[0] = -24 - 16*h->mb.i_mb_x;
- else if( n > 16 * h->sps->i_mb_width + 24 )
- mvp[0] = 16 * ( h->sps->i_mb_width - h->mb.i_mb_x ) + 24;
+ /* Special case, need to clip the vector */
+ n = 16 * h->mb.i_mb_x + mvp[0];
+ if( n < -24 )
+ mvp[0] = -24 - 16*h->mb.i_mb_x;
+ else if( n > 16 * h->sps->i_mb_width + 24 )
+ mvp[0] = 16 * ( h->sps->i_mb_width - h->mb.i_mb_x ) + 24;
- n = 16 * h->mb.i_mb_y + mvp[1];
- if( n < -24 )
- mvp[1] = -24 - 16*h->mb.i_mb_y;
- else if( n > 16 * h->sps->i_mb_height + 8 )
- mvp[1] = 16 * ( h->sps->i_mb_height - h->mb.i_mb_y ) + 8;
+ n = 16 * h->mb.i_mb_y + mvp[1];
+ if( n < -24 )
+ mvp[1] = -24 - 16*h->mb.i_mb_y;
+ else if( n > 16 * h->sps->i_mb_height + 8 )
+ mvp[1] = 16 * ( h->sps->i_mb_height - h->mb.i_mb_y ) + 8;
- /* Motion compensation */
- h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
- h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
- mvp[0], mvp[1], 16, 16 );
+ /* Motion compensation */
+ h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
+ h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
+ mvp[0], mvp[1], 16, 16 );
+ }
/* get luma diff */
h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
- h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride,
- h->mb.pic.p_fdec[1+ch], i_stride,
- mvp[0], mvp[1], 8, 8 );
+ if( !b_bidir )
+ {
+ h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride,
+ h->mb.pic.p_fdec[1+ch], i_stride,
+ mvp[0], mvp[1], 8, 8 );
+ }
h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
#include "../common/macroblock.h"
-int x264_macroblock_probe_pskip( x264_t *h );
+int x264_macroblock_probe_skip( x264_t *h, int b_bidir );
+
+static inline int x264_macroblock_probe_pskip( x264_t *h )
+ { return x264_macroblock_probe_skip( h, 0 ); }
+static inline int x264_macroblock_probe_bskip( x264_t *h )
+ { return x264_macroblock_probe_skip( h, 1 ); }
void x264_macroblock_encode ( x264_t *h );
void x264_macroblock_write_cabac ( x264_t *h, bs_t *s );
rc->gop_size = h->param.i_iframe;
rc->bitrate = h->param.rc.i_bitrate * 1000;
- rc->nmb = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16);
+ rc->nmb = h->mb.i_mb_count;
rc->qp = h->param.rc.i_qp_constant;
rc->qpa = rc->qp;
sps->b_frame_mbs_only = 1;
sps->b_mb_adaptive_frame_field = 0;
sps->b_direct8x8_inference = 0;
- if( sps->b_frame_mbs_only == 0 )
+ if( sps->b_frame_mbs_only == 0 ||
+ !(param->analyse.inter & X264_ANALYSE_PSUB8x8) )
{
sps->b_direct8x8_inference = 1;
}
#include <stdarg.h>
-#define X264_BUILD 0x000c
+#define X264_BUILD 0x000d
/* x264_t:
* opaque handler for decoder and encoder */
#define X264_ANALYSE_I4x4 0x0001 /* Analyse i4x4 */
#define X264_ANALYSE_PSUB16x16 0x0010 /* Analyse p16x8, p8x16 and p8x8 */
#define X264_ANALYSE_PSUB8x8 0x0020 /* Analyse p8x4, p4x8, p4x4 */
+#define X264_ANALYSE_BSUB16x16 0x0100 /* Analyse b16x8, b8x16 and b8x8 */
+#define X264_DIRECT_PRED_NONE 0
+#define X264_DIRECT_PRED_TEMPORAL 1
+#define X264_DIRECT_PRED_SPATIAL 2
/* Colorspace type
*/
unsigned int intra; /* intra flags */
unsigned int inter; /* inter flags */
+ int i_direct_mv_pred; /* spatial vs temporal mv prediction */
+
int i_subpel_refine; /* subpixel motion estimation quality */
int b_psnr; /* Do we compute PSNR stats (save a few % of cpu) */