From: Loren Merritt Date: Fri, 17 Dec 2004 10:57:02 +0000 (+0000) Subject: implement macroblock types B_SKIP, B_DIRECT, B_8x8 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=199ff7406b76dc1c10b756053398bf8a834bcf5c;p=libx264 implement macroblock types B_SKIP, B_DIRECT, B_8x8 git-svn-id: svn://svn.videolan.org/x264/trunk@68 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/common.c b/common/common.c index 1d64d2ed..243396fa 100644 --- a/common/common.c +++ b/common/common.c @@ -79,7 +79,7 @@ void x264_param_default( x264_param_t *param ) param->rc.i_qp_max = 51; param->rc.i_qp_step = 4; param->rc.f_ip_factor = 1.4; - param->rc.f_pb_factor = 1.4; + param->rc.f_pb_factor = 1.3; param->rc.b_stat_write = 0; param->rc.psz_stat_out = "x264_2pass.log"; @@ -97,7 +97,8 @@ void x264_param_default( x264_param_t *param ) /* */ param->analyse.intra = X264_ANALYSE_I4x4; - param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16; + param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16; + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL; param->analyse.i_subpel_refine = 1; param->analyse.b_psnr = 1; } diff --git a/common/common.h b/common/common.h index 1aacd57d..c60fdfdd 100644 --- a/common/common.h +++ b/common/common.h @@ -250,6 +250,8 @@ struct x264_t /* MB table and cache for current frame/mb */ struct { + int i_mb_count; /* number of mbs in a frame */ + /* Strides */ int i_mb_stride; @@ -270,7 +272,15 @@ struct x264_t int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */ int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */ int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */ - int16_t (*mvr[2][16])[2]; /* mb mv for each possible ref */ + int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */ + int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */ + + /* for B_SKIP and B_DIRECT motion prediction */ + struct + { + int16_t (*mv)[2]; /* keep only L0 */ + int8_t *ref; + } list1ref0; /* current value */ int i_type; @@ -313,6 +323,12 @@ struct x264_t /* 0 if non avaible */ int16_t mv[2][X264_SCAN8_SIZE][2]; int16_t mvd[2][X264_SCAN8_SIZE][2]; + + /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */ + int8_t skip[X264_SCAN8_SIZE]; + + int16_t direct_mv[2][X264_SCAN8_SIZE][2]; + int8_t direct_ref[2][X264_SCAN8_SIZE]; } cache; /* */ diff --git a/common/macroblock.c b/common/macroblock.c index 77982488..356fc6bf 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -349,6 +349,183 @@ void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] ) } } +static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h ) +{ + int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x; + int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x; + int i; + + x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 ); + + for( i = 0; i < 4; i++ ) + { + const int x8 = 2*(i%2); + const int y8 = 2*(i/2); + /* TODO: MapColToList0 */ + const int i_ref = h->mb.list1ref0.ref[ i_mb_8x8 + x8/2 + y8 * h->mb.i_mb_stride ]; + + if( i_ref == -1 ) + { + x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, 0 ); + x264_macroblock_cache_mv( h, x8, y8, 2, 2, 0, 0, 0 ); + x264_macroblock_cache_mv( h, x8, y8, 2, 2, 1, 0, 0 ); + } + else + { + int tb = x264_clip3( h->fdec->i_poc - h->fref0[i_ref]->i_poc, -128, 127 ); + int td = x264_clip3( h->fref1[0]->i_poc - h->fref0[i_ref]->i_poc, -128, 127 ); + int tx = (16384 + (abs(td) >> 1)) / td; + int dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 ); + int x4, y4; + + x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, i_ref ); + + for( y4 = y8; y4 < y8+2; y4++ ) + for( x4 = x8; x4 < x8+2; x4++ ) + { + const int16_t *mv_col = h->mb.list1ref0.mv[ i_mb_4x4 + x4 + y4 * 4 * h->mb.i_mb_stride ]; + if( td == 0 /* || pic0 is a long-term ref */ ) + { + x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_col[0], mv_col[1] ); + x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 ); + } + else + { + int mv_l0[2]; + mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8; + mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8; + x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_l0[0], mv_l0[1] ); + x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] ); + } + } + } + } + + return 1; +} + +static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h ) +{ + int ref[2]; + int mv[2][2]; + int i_list; + int i8, i4; + const int s8x8 = 2 * h->mb.i_mb_stride; + const int s4x4 = 4 * h->mb.i_mb_stride; + const int8_t *l1ref = &h->mb.list1ref0.ref[ 2*h->mb.i_mb_x + 2*s8x8*h->mb.i_mb_y ]; + const int16_t (*l1mv)[2] = (const int16_t (*)[2]) + &h->mb.list1ref0.mv[ 4*h->mb.i_mb_x + 4*s4x4*h->mb.i_mb_y ]; + + for( i_list=0; i_list<2; i_list++ ) + { + int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1]; + int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8]; + int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4]; + if( i_refc == -2 ) + i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1]; + + ref[i_list] = i_refa; + if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 )) + ref[i_list] = i_refb; + if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 )) + ref[i_list] = i_refc; + if( ref[i_list] < 0 ) + ref[i_list] = -1; + } + + if( ref[0] < 0 && ref[1] < 0 ) + { + ref[0] = + ref[1] = 0; + mv[0][0] = + mv[0][1] = + mv[1][0] = + mv[1][1] = 0; + } + else + { + for( i_list=0; i_list<2; i_list++ ) + { + if( ref[i_list] >= 0 ) + x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] ); + else + mv[i_list][0] = mv[i_list][1] = 0; + } + } + + /* FIXME: clip mv ? */ + + x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] ); + x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] ); + x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, mv[0][0], mv[0][1] ); + x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, mv[1][0], mv[1][1] ); + + /* col_zero_flag */ + for( i8=0; i8<4; i8++ ) + { + const int x8 = i8%2; + const int y8 = i8/2; + if( l1ref[ x8 + y8*s8x8 ] == 0 ) + { + for( i4=0; i4<4; i4++ ) + { + const int x4 = i4%2 + 2*x8; + const int y4 = i4/2 + 2*y8; + const int16_t *mvcol = l1mv[x4 + y4*s4x4]; + if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 ) + { + if( ref[0] == 0 ) + x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0, 0 ); + if( ref[1] == 0 ) + x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 ); + } + } + } + } + + return 1; +} + +int x264_mb_predict_mv_direct16x16( x264_t *h ) +{ + int b_available; + if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE ) + return 0; + else if( h->sh.b_direct_spatial_mv_pred ) + b_available = x264_mb_predict_mv_direct16x16_spatial( h ); + else + b_available = x264_mb_predict_mv_direct16x16_temporal( h ); + + /* cache ref & mv */ + if( b_available ) + { + int i, l; + for( l = 0; l < 2; l++ ) + for( i = 0; i < 4; i++ ) + h->mb.cache.direct_ref[l][i] = h->mb.cache.ref[l][x264_scan8[i*4]]; + memcpy(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv)); + } + + return b_available; +} + +void x264_mb_load_mv_direct8x8( x264_t *h, int idx ) +{ + const int x = 2*(idx%2); + const int y = 2*(idx/2); + int l; + x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] ); + x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] ); + for( l = 0; l < 2; l++ ) + { + *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]] = + *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]]; + *(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]+8] = + *(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]+8]; + } +} + +/* This just improves encoder performance, it's not part of the spec */ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc ) { int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref]; @@ -469,6 +646,49 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 ); } +static void x264_mb_mc_direct8x8( x264_t *h, int x, int y ) +{ + const int i8 = x264_scan8[0] + x + 8*y; + + /* FIXME: optimize based on current block size, not global settings? */ + if( h->sps->b_direct8x8_inference ) + { + if( h->mb.cache.ref[0][i8] >= 0 ) + if( h->mb.cache.ref[1][i8] >= 0 ) + x264_mb_mc_01xywh( h, x, y, 2, 2 ); + else + x264_mb_mc_0xywh( h, x, y, 2, 2 ); + else + x264_mb_mc_1xywh( h, x, y, 2, 2 ); + } + else + { + if( h->mb.cache.ref[0][i8] >= 0 ) + { + if( h->mb.cache.ref[1][i8] >= 0 ) + { + x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 ); + } + else + { + x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 ); + } + } + else + { + x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 ); + } + } +} void x264_mb_mc( x264_t *h ) { @@ -489,7 +709,7 @@ void x264_mb_mc( x264_t *h ) x264_mb_mc_0xywh( h, 2, 0, 2, 4 ); } } - else if( h->mb.i_type == P_8x8 ) + else if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 ) { int i; for( i = 0; i < 4; i++ ) @@ -515,13 +735,55 @@ void x264_mb_mc( x264_t *h ) x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 ); x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 ); break; + case D_L1_8x8: + x264_mb_mc_1xywh( h, x, y, 2, 2 ); + break; + case D_L1_8x4: + x264_mb_mc_1xywh( h, x, y+0, 2, 1 ); + x264_mb_mc_1xywh( h, x, y+1, 2, 1 ); + break; + case D_L1_4x8: + x264_mb_mc_1xywh( h, x+0, y, 1, 2 ); + x264_mb_mc_1xywh( h, x+1, y, 1, 2 ); + break; + case D_L1_4x4: + x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 ); + break; + case D_BI_8x8: + x264_mb_mc_01xywh( h, x, y, 2, 2 ); + break; + case D_BI_8x4: + x264_mb_mc_01xywh( h, x, y+0, 2, 1 ); + x264_mb_mc_01xywh( h, x, y+1, 2, 1 ); + break; + case D_BI_4x8: + x264_mb_mc_01xywh( h, x+0, y, 1, 2 ); + x264_mb_mc_01xywh( h, x+1, y, 1, 2 ); + break; + case D_BI_4x4: + x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 ); + break; + case D_DIRECT_8x8: + x264_mb_mc_direct8x8( h, x, y ); + break; } } } - else if( h->mb.i_type == B_8x8 || h->mb.i_type == B_DIRECT ) + else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT ) { - x264_log( h, X264_LOG_ERROR, "mc_luma with unsupported mb\n" ); - return; + int i; + for( i = 0; i < 4; i++ ) + { + const int x = 2*(i%2); + const int y = 2*(i/2); + x264_mb_mc_direct8x8( h, x, y ); + } } else /* B_*x* */ { @@ -568,13 +830,14 @@ void x264_mb_mc( x264_t *h ) void x264_macroblock_cache_init( x264_t *h ) { int i, j; - int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height; + int i_mb_count = h->mb.i_mb_count; h->mb.i_mb_stride = h->sps->i_mb_width; h->mb.type= x264_malloc( i_mb_count * sizeof( int8_t) ); h->mb.qp = x264_malloc( i_mb_count * sizeof( int8_t) ); h->mb.cbp = x264_malloc( i_mb_count * sizeof( int16_t) ); + h->mb.skipbp = x264_malloc( i_mb_count * sizeof( int8_t) ); /* 0 -> 3 top(4), 4 -> 6 : left(3) */ h->mb.intra4x4_pred_mode = x264_malloc( i_mb_count * 7 * sizeof( int8_t ) ); @@ -598,6 +861,14 @@ void x264_macroblock_cache_init( x264_t *h ) for( j=0; j<16; j++ ) /* FIXME: alloc no more than param.i_frame_reference */ h->mb.mvr[i][j] = x264_malloc( 2 * i_mb_count * sizeof( int16_t ) ); + h->mb.list1ref0.ref = NULL; + h->mb.list1ref0.mv = NULL; + if( h->param.i_bframe ) + { + h->mb.list1ref0.ref = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) ); + h->mb.list1ref0.mv = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) ); + } + /* init with not avaiable (for top right idx=7,15) */ memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) ); memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) ); @@ -614,12 +885,18 @@ void x264_macroblock_cache_end( x264_t *h ) x264_free( h->mb.mvd[0] ); x264_free( h->mb.mvd[1] ); } + if( h->param.i_bframe ) + { + x264_free( h->mb.list1ref0.ref ); + x264_free( h->mb.list1ref0.mv ); + } x264_free( h->mb.mv[0] ); x264_free( h->mb.mv[1] ); x264_free( h->mb.ref[0] ); x264_free( h->mb.ref[1] ); x264_free( h->mb.intra4x4_pred_mode ); x264_free( h->mb.non_zero_count ); + x264_free( h->mb.skipbp ); x264_free( h->mb.cbp ); x264_free( h->mb.qp ); x264_free( h->mb.type ); @@ -923,6 +1200,29 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) } } } + + /* load skip */ + if( h->param.b_cabac ) + { + if( h->sh.i_type == SLICE_TYPE_B ) + { + memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) ); + if( i_left_xy >= 0 ) + { + h->mb.cache.skip[x264_scan8[0] - 1] = h->mb.skipbp[i_left_xy] & 0x2; + h->mb.cache.skip[x264_scan8[8] - 1] = h->mb.skipbp[i_left_xy] & 0x8; + } + if( i_top_xy >= 0 ) + { + h->mb.cache.skip[x264_scan8[0] - 8] = h->mb.skipbp[i_top_xy] & 0x4; + h->mb.cache.skip[x264_scan8[4] - 8] = h->mb.skipbp[i_top_xy] & 0x8; + } + } + else if( h->mb.i_mb_xy == 0 && h->sh.i_type == SLICE_TYPE_P ) + { + memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) ); + } + } } } @@ -1036,7 +1336,7 @@ void x264_macroblock_cache_save( x264_t *h ) else h->mb.chroma_pred_mode[i_mb_xy] = I_PRED_CHROMA_DC; - if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) ) + if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) ) { int i_list; for( i_list = 0; i_list < 2; i_list++ ) @@ -1070,6 +1370,27 @@ void x264_macroblock_cache_save( x264_t *h ) } } } + if( h->sh.i_type == SLICE_TYPE_B ) + { + if( i_mb_type == B_SKIP || i_mb_type == B_DIRECT ) + h->mb.skipbp[i_mb_xy] = 0xf; + else if( i_mb_type == B_8x8 ) + { + int skipbp = 0; + for( i = 0; i < 4; i++ ) + skipbp |= ( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) << i; + h->mb.skipbp[i_mb_xy] = skipbp; + } + else + h->mb.skipbp[i_mb_xy] = 0; + } } } +void x264_macroblock_direct_ref_save( x264_t *h ) +{ + /* Manipulation of ref numbers is unnecessary unless we allow + * ref list reordering, multiple B-frame delay, or B-frames as refs. */ + memcpy( h->mb.list1ref0.ref, h->mb.ref[0], 4 * h->mb.i_mb_count * sizeof( int8_t ) ); + memcpy( h->mb.list1ref0.mv, h->mb.mv[0], 2*16 * h->mb.i_mb_count * sizeof( int16_t ) ); +} diff --git a/common/macroblock.h b/common/macroblock.h index 1cc6f6aa..02689868 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -37,6 +37,7 @@ enum macroblock_position_e /* XXX mb_type isn't the one written in the bitstream -> only internal usage */ #define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_16x16 ) #define IS_SKIP(type) ( (type) == P_SKIP || (type) == B_SKIP ) +#define IS_DIRECT(type) ( (type) == B_DIRECT ) enum mb_class_e { I_4x4 = 0, @@ -118,6 +119,21 @@ enum mb_partition_e D_16x16 = 16, }; +static const int x264_mb_partition_listX_table[2][17] = +{{ + 1, 1, 1, 1, /* D_L0_* */ + 0, 0, 0, 0, /* D_L1_* */ + 1, 1, 1, 1, /* D_BI_* */ + 0, /* D_DIRECT_8x8 */ + 0, 0, 0, 0 /* 8x8 .. 16x16 */ +}, +{ + 0, 0, 0, 0, /* D_L0_* */ + 1, 1, 1, 1, /* D_L1_* */ + 1, 1, 1, 1, /* D_BI_* */ + 0, /* D_DIRECT_8x8 */ + 0, 0, 0, 0 /* 8x8 .. 16x16 */ +}}; static const int x264_mb_partition_count_table[17] = { /* sub L0 */ @@ -137,6 +153,8 @@ void x264_macroblock_cache_load( x264_t *h, int, int ); void x264_macroblock_cache_save( x264_t *h ); void x264_macroblock_cache_end( x264_t *h ); +void x264_macroblock_direct_ref_save( x264_t *h ); + void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int i_qscale ); void x264_mb_dequant_2x2_dc( int16_t dct[2][2], int i_qscale ); void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale ); @@ -150,14 +168,23 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] ); * h->mb. need only valid values from other blocks */ void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] ); /* x264_mb_predict_mv: - * set mvp with predicted mv for all blocks except P_SKIP + * set mvp with predicted mv for all blocks except SKIP and DIRECT * h->mb. need valid ref/partition/sub of current block to be valid * and valid mv/ref from other blocks . */ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] ); +/* x264_mb_predict_mv_direct16x16: + * set h->mb.cache.mv and h->mb.cache.ref for B_SKIP or B_DIRECT + * h->mb. need only valid values from other blocks + * return 1 on success, 0 on failure */ +int x264_mb_predict_mv_direct16x16( x264_t *h ); +/* x264_mb_load_mv_direct8x8: + * set h->mb.cache.mv and h->mb.cache.ref for B_DIRECT + * must be called only after x264_mb_predict_mv_direct16x16 */ +void x264_mb_load_mv_direct8x8( x264_t *h, int idx ); /* x264_mb_predict_mv_ref16x16: * set mvc with D_16x16 prediction. * uses all neighbors, even those that didn't end up using this ref. - * need only valid values from other blocks */ + * h->mb. need only valid values from other blocks */ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc ); @@ -204,6 +231,17 @@ static inline void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width } } } +static inline void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip ) +{ + int dy, dx; + for( dy = 0; dy < height; dy++ ) + { + for( dx = 0; dx < width; dx++ ) + { + h->mb.cache.skip[X264_SCAN8_0+x+dx+8*(y+dy)] = b_skip; + } + } +} #endif diff --git a/encoder/analyse.c b/encoder/analyse.c index 7893513c..04fd6ffd 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -91,6 +91,11 @@ typedef struct x264_mb_analysis_list_t l1; int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */ + int i_cost16x16direct; + int i_cost8x8bi; + int i_cost8x8direct[4]; + + int b_direct_available; } x264_mb_analysis_t; @@ -163,12 +168,15 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) a->l1.i_cost4x4[i] = -1; a->l1.i_cost8x4[i] = -1; a->l1.i_cost4x8[i] = -1; + a->i_cost8x8direct[i] = -1; } a->l1.i_cost16x8 = -1; a->l1.i_cost8x16 = -1; a->i_cost16x16bi = -1; + a->i_cost16x16direct = -1; + a->i_cost8x8bi = -1; } } } @@ -719,6 +727,27 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8 a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost; } +static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a ) +{ + /* Assumes that fdec still contains the results of + * x264_mb_predict_mv_direct16x16 and x264_mb_mc */ + + uint8_t *p_fenc = h->mb.pic.p_fenc[0]; + uint8_t *p_fdec = h->mb.pic.p_fdec[0]; + int i_stride= h->mb.pic.i_stride[0]; + int i; + + a->i_cost16x16direct = 0; + for( i = 0; i < 4; i++ ) + { + const int x8 = i%2; + const int y8 = i/2; + const int off = 8 * x8 + 8 * i_stride * y8; + a->i_cost16x16direct += + a->i_cost8x8direct[i] = + h->pixf.satd[PIXEL_8x8]( &p_fenc[off], i_stride, &p_fdec[off], i_stride ); + } +} static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) { @@ -796,6 +825,121 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) ); } +static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a ) +{ + uint8_t pix[2][8*8]; + uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0], + h->mb.pic.p_fref[1][a->l1.i_ref][0] }; + uint8_t *p_fenc = h->mb.pic.p_fenc[0]; + int mvc[2][5][2], i_mvc[2]; + int i, j; + + /* XXX Needed for x264_mb_predict_mv */ + h->mb.i_partition = D_8x8; + + a->i_cost8x8bi = 0; + + i_mvc[0] = i_mvc[1] = 1; + mvc[0][0][0] = a->l0.me16x16.mv[0]; + mvc[0][0][1] = a->l0.me16x16.mv[1]; + mvc[1][0][0] = a->l1.me16x16.mv[0]; + mvc[1][0][1] = a->l1.me16x16.mv[1]; + + + for( i = 0; i < 4; i++ ) + { + const int x8 = i%2; + const int y8 = i/2; + uint8_t *p_fenc_i = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)]; + int i_part_cost; + int i_part_cost_bi = 0; + + for( j = 0; j < 2; j++ ) + { + x264_mb_analysis_list_t *l = j ? &a->l1 : &a->l0; + x264_me_t *m = &l->me8x8[i]; + + m->i_pixel = PIXEL_8x8; + m->lm = a->i_lambda; + + m->p_fenc = p_fenc_i; + m->p_fref = &p_fref[j][8*(y8*h->mb.pic.i_stride[0]+x8)]; + m->i_stride = h->mb.pic.i_stride[0]; + m->i_mv_range = a->i_mv_range; + + x264_mb_predict_mv( h, j, 4*i, 2, m->mvp ); + x264_me_search( h, m, mvc[j], i_mvc[j] ); + + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, j, m->mv[0], m->mv[1] ); + l->i_cost8x8 += m->cost; + + /* BI mode */ + h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[j], 8, + m->mv[0], m->mv[1], 8, 8 ); + /* FIXME: add ref cost */ + i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) + + bs_size_se( m->mv[1] - m->mvp[1] ) ); + } + + h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 ); + i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 ); + + i_part_cost = a->l0.me8x8[i].cost; + h->mb.i_sub_partition[i] = D_L0_8x8; + if( a->l1.me8x8[i].cost < i_part_cost ) + { + i_part_cost = a->l1.me8x8[i].cost; + h->mb.i_sub_partition[i] = D_L1_8x8; + } + if( i_part_cost_bi < i_part_cost ) + { + i_part_cost = i_part_cost_bi; + h->mb.i_sub_partition[i] = D_BI_8x8; + } + if( a->i_cost8x8direct[i] < i_part_cost && a->i_cost8x8direct[i] >= 0) + { + i_part_cost = a->i_cost8x8direct[i]; + h->mb.i_sub_partition[i] = D_DIRECT_8x8; + } + a->i_cost8x8bi += i_part_cost; + + /* XXX Needed for x264_mb_predict_mv */ + if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) + { + x264_mb_load_mv_direct8x8( h, i ); + x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 ); + x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 ); + x264_macroblock_cache_skip( h, 2*x8, 2*y8, 2, 2, 1 ); + } + else + { + if( h->mb.i_sub_partition[i] == D_L1_8x8 ) + { + x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, -1 ); + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 ); + x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 ); + } + else + { + x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, a->l0.i_ref ); + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, a->l0.me8x8[i].mv[0], a->l0.me8x8[i].mv[1] ); + } + + if( h->mb.i_sub_partition[i] == D_L0_8x8 ) + { + x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 1, -1 ); + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 ); + x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 ); + } + else + { + x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 1, a->l1.i_ref ); + x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, a->l1.me8x8[i].mv[0], a->l1.me8x8[i].mv[1] ); + } + } + } +} + /***************************************************************************** * x264_macroblock_analyse: *****************************************************************************/ @@ -1011,39 +1155,90 @@ void x264_macroblock_analyse( x264_t *h ) } else if( h->sh.i_type == SLICE_TYPE_B ) { + const unsigned int i_neighbour = h->mb.i_neighbour; + const unsigned int flags = h->param.analyse.inter; + int b_skip = 0; int i_cost; - /* best inter mode */ - x264_mb_analyse_inter_b16x16( h, &analysis ); - h->mb.i_type = B_L0_L0; - h->mb.i_partition = D_16x16; - i_cost = analysis.l0.me16x16.cost; - - if( analysis.l1.me16x16.cost < i_cost ) + analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h ); + if( analysis.b_direct_available ) { - h->mb.i_type = B_L1_L1; - i_cost = analysis.l1.me16x16.cost; - } - if( analysis.i_cost16x16bi < i_cost ) - { - h->mb.i_type = B_BI_BI; - i_cost = analysis.i_cost16x16bi; - } + h->mb.i_type = B_SKIP; + x264_mb_mc( h ); - /* best intra mode */ - x264_mb_analyse_intra( h, &analysis ); - if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost ) - { - h->mb.i_type = I_16x16; - i_cost = analysis.i_sad_i16x16; + /* Conditioning the probe on neighboring block types + * doesn't seem to help speed or quality. */ + b_skip = x264_macroblock_probe_bskip( h ); } - if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost ) + + if( !b_skip ) { - h->mb.i_type = I_4x4; - i_cost = analysis.i_sad_i4x4; + /* best inter mode */ + /* direct must be first */ + if( analysis.b_direct_available ) + x264_mb_analyse_inter_direct( h, &analysis ); + + x264_mb_analyse_inter_b16x16( h, &analysis ); + + /* 8x8 must be last */ + if( flags & X264_ANALYSE_BSUB16x16 ) + x264_mb_analyse_inter_b8x8( h, &analysis ); + + h->mb.i_type = B_L0_L0; + h->mb.i_partition = D_16x16; + i_cost = analysis.l0.me16x16.cost; + if( analysis.l1.me16x16.cost < i_cost ) + { + h->mb.i_type = B_L1_L1; + i_cost = analysis.l1.me16x16.cost; + } + if( analysis.i_cost16x16bi < i_cost ) + { + h->mb.i_type = B_BI_BI; + i_cost = analysis.i_cost16x16bi; + } + if( analysis.i_cost16x16direct < i_cost && analysis.i_cost16x16direct >= 0 ) + { + h->mb.i_type = B_DIRECT; + i_cost = analysis.i_cost16x16direct; + } + if( analysis.i_cost8x8bi < i_cost && analysis.i_cost8x8bi >= 0 ) + { + h->mb.i_type = B_8x8; + h->mb.i_partition = D_8x8; + i_cost = analysis.i_cost8x8bi; + } + + /* refine qpel */ + if( h->mb.i_partition == D_16x16 ) + { + if( h->mb.i_type == B_L0_L0 ) + { + x264_me_refine_qpel( h, &analysis.l0.me16x16 ); + i_cost = analysis.l0.me16x16.cost; + } + else if( h->mb.i_type == B_L1_L1 ) + { + x264_me_refine_qpel( h, &analysis.l1.me16x16 ); + i_cost = analysis.l1.me16x16.cost; + } + } + /* TODO: refine bidir, 8x8 */ + + /* best intra mode */ + x264_mb_analyse_intra( h, &analysis ); + if( analysis.i_sad_i16x16 >= 0 && analysis.i_sad_i16x16 < i_cost ) + { + h->mb.i_type = I_16x16; + i_cost = analysis.i_sad_i16x16; + } + if( analysis.i_sad_i4x4 >=0 && analysis.i_sad_i4x4 < i_cost ) + { + h->mb.i_type = I_4x4; + i_cost = analysis.i_sad_i4x4; + } } } -#undef BEST_TYPE /*-------------------- Update MB from the analysis ----------------------*/ h->mb.type[h->mb.i_mb_xy] = h->mb.i_type; @@ -1134,6 +1329,15 @@ void x264_macroblock_analyse( x264_t *h ) break; } + case B_SKIP: + case B_DIRECT: + /* probably unnecessary for B_SKIP */ + x264_mb_load_mv_direct8x8( h, 0 ); + x264_mb_load_mv_direct8x8( h, 1 ); + x264_mb_load_mv_direct8x8( h, 2 ); + x264_mb_load_mv_direct8x8( h, 3 ); + break; + case B_L0_L0: switch( h->mb.i_partition ) { @@ -1183,6 +1387,9 @@ void x264_macroblock_analyse( x264_t *h ) break; } break; + case B_8x8: + /* nothing to do: caches were updated during analysis */ + break; default: fprintf( stderr, "internal error (invalid MB type)\n" ); diff --git a/encoder/cabac.c b/encoder/cabac.c index 0dc3228c..f30af96c 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -612,9 +612,9 @@ static inline void x264_cabac_mb_ref( x264_t *h, int i_list, int idx ) int i_ref = h->mb.cache.ref[i_list][i8]; int ctx = 0; - if( i_refa > 0 ) + if( i_refa > 0 && !h->mb.cache.skip[i8 - 1]) ctx++; - if( i_refb > 0 ) + if( i_refb > 0 && !h->mb.cache.skip[i8 - 8]) ctx += 2; while( i_ref > 0 ) @@ -706,6 +706,47 @@ static inline void x264_cabac_mb_mvd( x264_t *h, int i_list, int idx, int width x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mdx, mdy ); } +static inline void x264_cabac_mb8x8_mvd( x264_t *h, int i_list ) +{ + int i; + for( i = 0; i < 4; i++ ) + { + if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) + { + continue; + } + + switch( h->mb.i_sub_partition[i] ) + { + case D_L0_8x8: + case D_L1_8x8: + case D_BI_8x8: + x264_cabac_mb_mvd( h, i_list, 4*i, 2, 2 ); + break; + case D_L0_8x4: + case D_L1_8x4: + case D_BI_8x4: + x264_cabac_mb_mvd( h, i_list, 4*i+0, 2, 1 ); + x264_cabac_mb_mvd( h, i_list, 4*i+2, 2, 1 ); + break; + case D_L0_4x8: + case D_L1_4x8: + case D_BI_4x8: + x264_cabac_mb_mvd( h, i_list, 4*i+0, 1, 2 ); + x264_cabac_mb_mvd( h, i_list, 4*i+1, 1, 2 ); + break; + case D_L0_4x4: + case D_L1_4x4: + case D_BI_4x4: + x264_cabac_mb_mvd( h, i_list, 4*i+0, 1, 1 ); + x264_cabac_mb_mvd( h, i_list, 4*i+1, 1, 1 ); + x264_cabac_mb_mvd( h, i_list, 4*i+2, 1, 1 ); + x264_cabac_mb_mvd( h, i_list, 4*i+3, 1, 1 ); + break; + } + } +} + static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) { /* TODO: clean up/optimize */ @@ -964,6 +1005,7 @@ void x264_macroblock_write_cabac( x264_t *h, bs_t *s ) const int i_mb_pos_start = bs_pos( s ); int i_mb_pos_tex; + int i_list; int i; /* Write the MB type */ @@ -1060,40 +1102,36 @@ void x264_macroblock_write_cabac( x264_t *h, bs_t *s ) x264_cabac_mb_ref( h, 0, 12 ); } - for( i = 0; i < 4; i++ ) + x264_cabac_mb8x8_mvd( h, 0 ); + } + else if( i_mb_type == B_8x8 ) + { + /* sub mb type */ + x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[0] ); + x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[1] ); + x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[2] ); + x264_cabac_mb_sub_b_partition( h, h->mb.i_sub_partition[3] ); + + /* ref */ + for( i_list = 0; i_list < 2; i_list++ ) { - switch( h->mb.i_sub_partition[i] ) + if( ( i_list ? h->sh.i_num_ref_idx_l1_active : h->sh.i_num_ref_idx_l0_active ) == 1 ) + continue; + for( i = 0; i < 4; i++ ) { - case D_L0_8x8: - x264_cabac_mb_mvd( h, 0, 4*i, 2, 2 ); - break; - case D_L0_8x4: - x264_cabac_mb_mvd( h, 0, 4*i+0, 2, 1 ); - x264_cabac_mb_mvd( h, 0, 4*i+2, 2, 1 ); - break; - case D_L0_4x8: - x264_cabac_mb_mvd( h, 0, 4*i+0, 1, 2 ); - x264_cabac_mb_mvd( h, 0, 4*i+1, 1, 2 ); - break; - case D_L0_4x4: - x264_cabac_mb_mvd( h, 0, 4*i+0, 1, 1 ); - x264_cabac_mb_mvd( h, 0, 4*i+1, 1, 1 ); - x264_cabac_mb_mvd( h, 0, 4*i+2, 1, 1 ); - x264_cabac_mb_mvd( h, 0, 4*i+3, 1, 1 ); - break; + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) + { + x264_cabac_mb_ref( h, i_list, 4*i ); + } } } - } - else if( i_mb_type == B_8x8 ) - { - /* TODO */ - fprintf( stderr, "Arggg B_8x8\n" ); - return; + + x264_cabac_mb8x8_mvd( h, 0 ); + x264_cabac_mb8x8_mvd( h, 1 ); } else if( i_mb_type != B_DIRECT ) { /* All B mode */ - int i_list; int b_list[2][2]; /* init ref list utilisations */ diff --git a/encoder/cavlc.c b/encoder/cavlc.c index 5f19be00..3df4422a 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -41,6 +41,10 @@ static const uint8_t inter_cbp_to_golomb[48]= 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19, 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12 }; +static const uint8_t sub_mb_type_p_to_golomb[4]= +{ 3, 1, 2, 0 }; +static const uint8_t sub_mb_type_b_to_golomb[13]= +{ 10, 4, 5, 1, 11, 6, 7, 2, 12, 8, 9, 3, 0 }; static const uint8_t block_idx_x[16] = { @@ -257,6 +261,72 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int *l, i } } +static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list ) +{ + int i; + for( i = 0; i < 4; i++ ) + { + int mvp[2]; + + if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) + { + continue; + } + + switch( h->mb.i_sub_partition[i] ) + { + case D_L0_8x8: + case D_L1_8x8: + case D_BI_8x8: + x264_mb_predict_mv( h, i_list, 4*i, 2, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); + break; + case D_L0_8x4: + case D_L1_8x4: + case D_BI_8x4: + x264_mb_predict_mv( h, i_list, 4*i+0, 2, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); + + x264_mb_predict_mv( h, i_list, 4*i+2, 2, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] ); + break; + case D_L0_4x8: + case D_L1_4x8: + case D_BI_4x8: + x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); + + x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] ); + break; + case D_L0_4x4: + case D_L1_4x4: + case D_BI_4x4: + x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); + + x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] ); + + x264_mb_predict_mv( h, i_list, 4*i+2, 1, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] ); + + x264_mb_predict_mv( h, i_list, 4*i+3, 1, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][1] - mvp[1] ); + break; + } + } +} + /***************************************************************************** * x264_macroblock_write: *****************************************************************************/ @@ -421,21 +491,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) /* sub mb type */ for( i = 0; i < 4; i++ ) { - switch( h->mb.i_sub_partition[i] ) - { - case D_L0_8x8: - bs_write_ue( s, 0 ); - break; - case D_L0_8x4: - bs_write_ue( s, 1 ); - break; - case D_L0_4x8: - bs_write_ue( s, 2 ); - break; - case D_L0_4x4: - bs_write_ue( s, 3 ); - break; - } + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] ); } /* ref0 */ if( h->sh.i_num_ref_idx_l0_active > 1 && b_sub_ref0 ) @@ -445,59 +501,36 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[8]] ); bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[12]] ); } + + x264_sub_mb_mv_write_cavlc( h, s, 0 ); + } + else if( i_mb_type == B_8x8 ) + { + bs_write_ue( s, 22 ); + + /* sub mb type */ + for( i = 0; i < 4; i++ ) + { + bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] ); + } + /* ref */ for( i = 0; i < 4; i++ ) { - int mvp[2]; - - switch( h->mb.i_sub_partition[i] ) + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) { - case D_L0_8x8: - x264_mb_predict_mv( h, 0, 4*i, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] ); - break; - case D_L0_8x4: - x264_mb_predict_mv( h, 0, 4*i+0, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] ); - - x264_mb_predict_mv( h, 0, 4*i+2, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][1] - mvp[1] ); - break; - case D_L0_4x8: - x264_mb_predict_mv( h, 0, 4*i+0, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] ); - - x264_mb_predict_mv( h, 0, 4*i+1, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][1] - mvp[1] ); - break; - case D_L0_4x4: - x264_mb_predict_mv( h, 0, 4*i+0, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i]][1] - mvp[1] ); - - x264_mb_predict_mv( h, 0, 4*i+1, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+1]][1] - mvp[1] ); - - x264_mb_predict_mv( h, 0, 4*i+2, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+2]][1] - mvp[1] ); - - x264_mb_predict_mv( h, 0, 4*i+3, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+3]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[0][x264_scan8[4*i+3]][1] - mvp[1] ); - break; + bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[i*4]] ); } } - } - else if( i_mb_type == B_8x8 ) - { - fprintf( stderr, "invalid/unhandled mb_type (B_8x8)\n" ); - return; + for( i = 0; i < 4; i++ ) + { + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) + { + bs_write_te( s, h->sh.i_num_ref_idx_l1_active - 1, h->mb.cache.ref[1][x264_scan8[i*4]] ); + } + } + /* mvd */ + x264_sub_mb_mv_write_cavlc( h, s, 0 ); + x264_sub_mb_mv_write_cavlc( h, s, 1 ); } else if( i_mb_type != B_DIRECT ) { diff --git a/encoder/encoder.c b/encoder/encoder.c index 891ef73a..773c9c80 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -140,7 +140,7 @@ static void x264_slice_header_init( x264_slice_header_t *sh, x264_param_t *param sh->i_redundant_pic_cnt = 0; - sh->b_direct_spatial_mv_pred = 1; + sh->b_direct_spatial_mv_pred = ( param->analyse.i_direct_mv_pred == X264_DIRECT_PRED_SPATIAL ); sh->b_num_ref_idx_override = 0; sh->i_num_ref_idx_l0_active = 1; @@ -407,6 +407,8 @@ x264_t *x264_encoder_open ( x264_param_t *param ) h->pps = &h->pps_array[0]; x264_pps_init( h->pps, 0, &h->param, h->sps); + + h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height; /* Init frames. */ for( i = 0; i < X264_BFRAME_MAX + 1; i++ ) @@ -630,6 +632,12 @@ static inline void x264_reference_update( x264_t *h ) { int i; + /* save mvs for B-frame prediction */ + if( h->param.i_bframe ) + { + x264_macroblock_direct_ref_save( h ); + } + /* apply deblocking filter to the current decoded picture */ if( h->param.b_deblocking_filter ) { @@ -1166,7 +1174,7 @@ do_encode: h->i_frame_num--; /* Do IDR if needed and if we can (won't work with B frames) */ - if( h->frames.next[0] == NULL && + if( h->frames.current[0] == NULL && h->frames.i_last_idr + 1 >= h->param.i_idrframe ) { /* Reset */ diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 97334068..514b83a8 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -491,6 +491,21 @@ static void x264_mb_encode_8x8( x264_t *h, int b_inter, int i_qscale ) } } +static void x264_macroblock_encode_skip( x264_t *h ) +{ + int i; + h->mb.i_cbp_luma = 0x00; + h->mb.i_cbp_chroma = 0x00; + + for( i = 0; i < 16+8; i++ ) + { + h->mb.cache.non_zero_count[x264_scan8[i]] = 0; + } + + /* store cbp */ + h->mb.cbp[h->mb.i_mb_xy] = 0; +} + /***************************************************************************** * x264_macroblock_encode_pskip: * Encode an already marked skip block @@ -499,7 +514,6 @@ void x264_macroblock_encode_pskip( x264_t *h ) { const int mvx = h->mb.cache.mv[0][x264_scan8[0]][0]; const int mvy = h->mb.cache.mv[0][x264_scan8[0]][1]; - int i; /* Motion compensation XXX probably unneeded */ h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0], @@ -515,16 +529,7 @@ void x264_macroblock_encode_pskip( x264_t *h ) h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2], mvx, mvy, 8, 8 ); - h->mb.i_cbp_luma = 0x00; - h->mb.i_cbp_chroma = 0x00; - - for( i = 0; i < 16+8; i++ ) - { - h->mb.cache.non_zero_count[x264_scan8[i]] = 0; - } - - /* store cbp */ - h->mb.cbp[h->mb.i_mb_xy] = 0; + x264_macroblock_encode_skip( h ); } /***************************************************************************** @@ -542,6 +547,13 @@ void x264_macroblock_encode( x264_t *h ) x264_macroblock_encode_pskip( h ); return; } + if( h->mb.i_type == B_SKIP ) + { + /* XXX motion compensation is probably unneeded */ + x264_mb_mc( h ); + x264_macroblock_encode_skip( h ); + return; + } /* quantification scale */ i_qscale = h->mb.qp[h->mb.i_mb_xy]; @@ -750,14 +762,22 @@ void x264_macroblock_encode( x264_t *h ) } } } + + /* Check for B_SKIP */ + if( h->mb.i_type == B_DIRECT && + h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 ) + { + h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = B_SKIP; + h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */ + } } /***************************************************************************** - * x264_macroblock_probe_pskip: - * Check if the current MB could be encoded as a P_SKIP (it supposes you use + * x264_macroblock_probe_skip: + * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use * the previous QP *****************************************************************************/ -int x264_macroblock_probe_pskip( x264_t *h ) +int x264_macroblock_probe_skip( x264_t *h, int b_bidir ) { DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 ); DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 ); @@ -771,30 +791,33 @@ int x264_macroblock_probe_pskip( x264_t *h ) int i8x8, i4x4; int i_decimate_mb; - /* quantification scale */ + /* quantization scale */ i_qp = h->mb.qp[h->mb.i_mb_xy]; - /* Get the MV */ - x264_mb_predict_mv_pskip( h, mvp ); + if( !b_bidir ) + { + /* Get the MV */ + x264_mb_predict_mv_pskip( h, mvp ); - /* Special case, need to clip the vector */ - n = 16 * h->mb.i_mb_x + mvp[0]; - if( n < -24 ) - mvp[0] = -24 - 16*h->mb.i_mb_x; - else if( n > 16 * h->sps->i_mb_width + 24 ) - mvp[0] = 16 * ( h->sps->i_mb_width - h->mb.i_mb_x ) + 24; + /* Special case, need to clip the vector */ + n = 16 * h->mb.i_mb_x + mvp[0]; + if( n < -24 ) + mvp[0] = -24 - 16*h->mb.i_mb_x; + else if( n > 16 * h->sps->i_mb_width + 24 ) + mvp[0] = 16 * ( h->sps->i_mb_width - h->mb.i_mb_x ) + 24; - n = 16 * h->mb.i_mb_y + mvp[1]; - if( n < -24 ) - mvp[1] = -24 - 16*h->mb.i_mb_y; - else if( n > 16 * h->sps->i_mb_height + 8 ) - mvp[1] = 16 * ( h->sps->i_mb_height - h->mb.i_mb_y ) + 8; + n = 16 * h->mb.i_mb_y + mvp[1]; + if( n < -24 ) + mvp[1] = -24 - 16*h->mb.i_mb_y; + else if( n > 16 * h->sps->i_mb_height + 8 ) + mvp[1] = 16 * ( h->sps->i_mb_height - h->mb.i_mb_y ) + 8; - /* Motion compensation */ - h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0], - h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], - mvp[0], mvp[1], 16, 16 ); + /* Motion compensation */ + h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0], + h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], + mvp[0], mvp[1], 16, 16 ); + } /* get luma diff */ h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], @@ -829,9 +852,12 @@ int x264_macroblock_probe_pskip( x264_t *h ) uint8_t *p_src = h->mb.pic.p_fenc[1+ch]; uint8_t *p_dst = h->mb.pic.p_fdec[1+ch]; - h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride, - h->mb.pic.p_fdec[1+ch], i_stride, - mvp[0], mvp[1], 8, 8 ); + if( !b_bidir ) + { + h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride, + h->mb.pic.p_fdec[1+ch], i_stride, + mvp[0], mvp[1], 8, 8 ); + } h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride ); diff --git a/encoder/macroblock.h b/encoder/macroblock.h index 0b297cc9..4310c2e5 100644 --- a/encoder/macroblock.h +++ b/encoder/macroblock.h @@ -26,7 +26,12 @@ #include "../common/macroblock.h" -int x264_macroblock_probe_pskip( x264_t *h ); +int x264_macroblock_probe_skip( x264_t *h, int b_bidir ); + +static inline int x264_macroblock_probe_pskip( x264_t *h ) + { return x264_macroblock_probe_skip( h, 0 ); } +static inline int x264_macroblock_probe_bskip( x264_t *h ) + { return x264_macroblock_probe_skip( h, 1 ); } void x264_macroblock_encode ( x264_t *h ); void x264_macroblock_write_cabac ( x264_t *h, bs_t *s ); diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index bb944ddb..c33525aa 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -171,7 +171,7 @@ int x264_ratecontrol_new( x264_t *h ) rc->gop_size = h->param.i_iframe; rc->bitrate = h->param.rc.i_bitrate * 1000; - rc->nmb = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16); + rc->nmb = h->mb.i_mb_count; rc->qp = h->param.rc.i_qp_constant; rc->qpa = rc->qp; diff --git a/encoder/set.c b/encoder/set.c index 2f43f4ce..0ad5a21b 100644 --- a/encoder/set.c +++ b/encoder/set.c @@ -84,7 +84,8 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param ) sps->b_frame_mbs_only = 1; sps->b_mb_adaptive_frame_field = 0; sps->b_direct8x8_inference = 0; - if( sps->b_frame_mbs_only == 0 ) + if( sps->b_frame_mbs_only == 0 || + !(param->analyse.inter & X264_ANALYSE_PSUB8x8) ) { sps->b_direct8x8_inference = 1; } diff --git a/x264.h b/x264.h index b276d66f..8c662921 100644 --- a/x264.h +++ b/x264.h @@ -26,7 +26,7 @@ #include -#define X264_BUILD 0x000c +#define X264_BUILD 0x000d /* x264_t: * opaque handler for decoder and encoder */ @@ -50,6 +50,10 @@ typedef struct x264_t x264_t; #define X264_ANALYSE_I4x4 0x0001 /* Analyse i4x4 */ #define X264_ANALYSE_PSUB16x16 0x0010 /* Analyse p16x8, p8x16 and p8x8 */ #define X264_ANALYSE_PSUB8x8 0x0020 /* Analyse p8x4, p4x8, p4x4 */ +#define X264_ANALYSE_BSUB16x16 0x0100 /* Analyse b16x8, b8x16 and b8x8 */ +#define X264_DIRECT_PRED_NONE 0 +#define X264_DIRECT_PRED_TEMPORAL 1 +#define X264_DIRECT_PRED_SPATIAL 2 /* Colorspace type */ @@ -127,6 +131,8 @@ typedef struct unsigned int intra; /* intra flags */ unsigned int inter; /* inter flags */ + int i_direct_mv_pred; /* spatial vs temporal mv prediction */ + int i_subpel_refine; /* subpixel motion estimation quality */ int b_psnr; /* Do we compute PSNR stats (save a few % of cpu) */