From 3b66f690bd8a7d1417cedf98aec0df2702338bb2 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Mon, 20 Mar 2006 23:00:52 +0000 Subject: [PATCH] RD subpel motion estimation (--subme 7) git-svn-id: svn://svn.videolan.org/x264/trunk@476 df754926-b1dd-0310-bc7b-ec298dee348c --- common/macroblock.c | 125 ++++++++++++------------- common/macroblock.h | 1 + encoder/analyse.c | 170 +++++++++++++++++++++++++++++++++- encoder/cabac.c | 162 +++++++++++++++++++++++++-------- encoder/cavlc.c | 211 +++++++++++++++++++++++++++++-------------- encoder/encoder.c | 3 +- encoder/macroblock.c | 85 +++++++++++++++++ encoder/macroblock.h | 2 + encoder/me.c | 96 ++++++++++++++++++++ encoder/me.h | 2 + encoder/rdo.c | 86 ++++++++++++++++++ x264.c | 2 +- 12 files changed, 772 insertions(+), 173 deletions(-) diff --git a/common/macroblock.c b/common/macroblock.c index 1f14c6b6..1507cf57 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -687,6 +687,69 @@ static void x264_mb_mc_direct8x8( x264_t *h, int x, int y ) } } +void x264_mb_mc_8x8( x264_t *h, int i8 ) +{ + const int x = 2*(i8&1); + const int y = 2*(i8>>1); + switch( h->mb.i_sub_partition[i8] ) + { + case D_L0_8x8: + x264_mb_mc_0xywh( h, x, y, 2, 2 ); + break; + case D_L0_8x4: + x264_mb_mc_0xywh( h, x, y+0, 2, 1 ); + x264_mb_mc_0xywh( h, x, y+1, 2, 1 ); + break; + case D_L0_4x8: + x264_mb_mc_0xywh( h, x+0, y, 1, 2 ); + x264_mb_mc_0xywh( h, x+1, y, 1, 2 ); + break; + case D_L0_4x4: + x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 ); + break; + case D_L1_8x8: + x264_mb_mc_1xywh( h, x, y, 2, 2 ); + break; + case D_L1_8x4: + x264_mb_mc_1xywh( h, x, y+0, 2, 1 ); + x264_mb_mc_1xywh( h, x, y+1, 2, 1 ); + break; + case D_L1_4x8: + x264_mb_mc_1xywh( h, x+0, y, 1, 2 ); + x264_mb_mc_1xywh( h, x+1, y, 1, 2 ); + break; + case D_L1_4x4: + x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 ); + break; + case D_BI_8x8: + x264_mb_mc_01xywh( h, x, y, 2, 2 ); + break; + case D_BI_8x4: + x264_mb_mc_01xywh( h, x, y+0, 2, 1 ); + x264_mb_mc_01xywh( h, x, y+1, 2, 1 ); + break; + case D_BI_4x8: + x264_mb_mc_01xywh( h, x+0, y, 1, 2 ); + x264_mb_mc_01xywh( h, x+1, y, 1, 2 ); + break; + case D_BI_4x4: + x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 ); + x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 ); + x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 ); + x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 ); + break; + case D_DIRECT_8x8: + x264_mb_mc_direct8x8( h, x, y ); + break; + } +} + void x264_mb_mc( x264_t *h ) { if( h->mb.i_type == P_L0 ) @@ -710,67 +773,7 @@ void x264_mb_mc( x264_t *h ) { int i; for( i = 0; i < 4; i++ ) - { - const int x = 2*(i%2); - const int y = 2*(i/2); - switch( h->mb.i_sub_partition[i] ) - { - case D_L0_8x8: - x264_mb_mc_0xywh( h, x, y, 2, 2 ); - break; - case D_L0_8x4: - x264_mb_mc_0xywh( h, x, y+0, 2, 1 ); - x264_mb_mc_0xywh( h, x, y+1, 2, 1 ); - break; - case D_L0_4x8: - x264_mb_mc_0xywh( h, x+0, y, 1, 2 ); - x264_mb_mc_0xywh( h, x+1, y, 1, 2 ); - break; - case D_L0_4x4: - x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 ); - break; - case D_L1_8x8: - x264_mb_mc_1xywh( h, x, y, 2, 2 ); - break; - case D_L1_8x4: - x264_mb_mc_1xywh( h, x, y+0, 2, 1 ); - x264_mb_mc_1xywh( h, x, y+1, 2, 1 ); - break; - case D_L1_4x8: - x264_mb_mc_1xywh( h, x+0, y, 1, 2 ); - x264_mb_mc_1xywh( h, x+1, y, 1, 2 ); - break; - case D_L1_4x4: - x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 ); - break; - case D_BI_8x8: - x264_mb_mc_01xywh( h, x, y, 2, 2 ); - break; - case D_BI_8x4: - x264_mb_mc_01xywh( h, x, y+0, 2, 1 ); - x264_mb_mc_01xywh( h, x, y+1, 2, 1 ); - break; - case D_BI_4x8: - x264_mb_mc_01xywh( h, x+0, y, 1, 2 ); - x264_mb_mc_01xywh( h, x+1, y, 1, 2 ); - break; - case D_BI_4x4: - x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 ); - break; - case D_DIRECT_8x8: - x264_mb_mc_direct8x8( h, x, y ); - break; - } - } + x264_mb_mc_8x8( h, i ); } else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT ) { diff --git a/common/macroblock.h b/common/macroblock.h index 6ca6492b..6a54dc7d 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -263,6 +263,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ); void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ); void x264_mb_mc( x264_t *h ); +void x264_mb_mc_8x8( x264_t *h, int i8 ); static inline void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, int ref ) diff --git a/encoder/analyse.c b/encoder/analyse.c index e1a00d3b..af35e26a 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -660,6 +660,136 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_ } } +static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) +{ + uint8_t *p_src = h->mb.pic.p_fenc[0]; + uint8_t *p_dst = h->mb.pic.p_fdec[0]; + + int i, idx, x, y; + int i_max, i_sad, i_best, i_mode; + int i_pred_mode; + int predict_mode[9]; + + if( h->mb.i_type == I_16x16 ) + { + int old_pred_mode = a->i_predict16x16; + i_best = a->i_sad_i16x16; + predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max ); + for( i = 0; i < i_max; i++ ) + { + if( predict_mode[i] == old_pred_mode ) + continue; + h->mb.i_intra16x16_pred_mode = predict_mode[i]; + i_sad = x264_rd_cost_mb( h, a->i_lambda2 ); + if( i_best > i_sad ) + { + a->i_predict16x16 = predict_mode[i]; + i_best = i_sad; + } + } + } + else if( h->mb.i_type == I_4x4 ) + { + for( idx = 0; idx < 16; idx++ ) + { + uint32_t pels[4]; + int i_nnz = 0; + uint8_t *p_src_by; + uint8_t *p_dst_by; + i_best = COST_MAX; + + i_pred_mode= x264_mb_predict_intra4x4_mode( h, idx ); + x = block_idx_x[idx]; + y = block_idx_y[idx]; + + p_src_by = p_src + 4*x + 4*y*FENC_STRIDE; + p_dst_by = p_dst + 4*x + 4*y*FDEC_STRIDE; + predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max ); + + if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP ) + /* emulate missing topright samples */ + *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U; + + for( i = 0; i < i_max; i++ ) + { + i_mode = predict_mode[i]; + h->predict_4x4[i_mode]( p_dst_by ); + + i_sad = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode ); + + if( i_best > i_sad ) + { + a->i_predict4x4[x][y] = i_mode; + i_best = i_sad; + pels[0] = *(uint32_t*)(p_dst_by+0*FDEC_STRIDE); + pels[1] = *(uint32_t*)(p_dst_by+1*FDEC_STRIDE); + pels[2] = *(uint32_t*)(p_dst_by+2*FDEC_STRIDE); + pels[3] = *(uint32_t*)(p_dst_by+3*FDEC_STRIDE); + i_nnz = h->mb.cache.non_zero_count[x264_scan8[idx]]; + } + } + + *(uint32_t*)(p_dst_by+0*FDEC_STRIDE) = pels[0]; + *(uint32_t*)(p_dst_by+1*FDEC_STRIDE) = pels[1]; + *(uint32_t*)(p_dst_by+2*FDEC_STRIDE) = pels[2]; + *(uint32_t*)(p_dst_by+3*FDEC_STRIDE) = pels[3]; + h->mb.cache.non_zero_count[x264_scan8[idx]] = i_nnz; + + h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y]; + } + } + else if( h->mb.i_type == I_8x8 ) + { + for( idx = 0; idx < 4; idx++ ) + { + uint64_t pels_h = 0; + uint8_t pels_v[7]; + int i_nnz[3]; + uint8_t *p_src_by; + uint8_t *p_dst_by; + int j; + i_best = COST_MAX; + + i_pred_mode= x264_mb_predict_intra4x4_mode( h, 4*idx ); + x = idx&1; + y = idx>>1; + + p_src_by = p_src + 8*x + 8*y*FENC_STRIDE; + p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE; + predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max ); + for( i = 0; i < i_max; i++ ) + { + i_mode = predict_mode[i]; + h->predict_8x8[i_mode]( p_dst_by, h->mb.i_neighbour8[idx] ); + + i_sad = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode ); + + if( i_best > i_sad ) + { + a->i_predict8x8[x][y] = i_mode; + i_best = i_sad; + + pels_h = *(uint64_t*)(p_dst_by+7*FDEC_STRIDE); + if( !(idx&1) ) + for( j=0; j<7; j++ ) + pels_v[j] = p_dst_by[7+j*FDEC_STRIDE]; + for( j=0; j<3; j++ ) + i_nnz[j] = h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]]; + } + } + + *(uint64_t*)(p_dst_by+7*FDEC_STRIDE) = pels_h; + if( !(idx&1) ) + for( j=0; j<7; j++ ) + p_dst_by[7+j*FDEC_STRIDE] = pels_v[j]; + for( j=0; j<3; j++ ) + h->mb.cache.non_zero_count[x264_scan8[4*idx+j+1]] = i_nnz[j]; + + x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] ); + } + } +} + #define LOAD_FENC( m, src, xoff, yoff) \ (m)->i_stride[0] = h->mb.pic.i_stride[0]; \ (m)->i_stride[1] = h->mb.pic.i_stride[1]; \ @@ -1805,6 +1935,9 @@ void x264_macroblock_analyse( x264_t *h ) } if( analysis.i_sad_i8x8 < i_cost ) h->mb.i_type = I_8x8; + + if( h->mb.i_subpel_refine >= 7 ) + x264_intra_rd_refine( h, &analysis ); } else if( h->sh.i_type == SLICE_TYPE_P ) { @@ -1859,8 +1992,6 @@ void x264_macroblock_analyse( x264_t *h ) if( ( flags & X264_ANALYSE_PSUB16x16 ) && analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost ) { - int i; - i_type = P_8x8; i_partition = D_8x8; h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] = @@ -2041,6 +2172,41 @@ void x264_macroblock_analyse( x264_t *h ) h->mb.i_type = i_type; h->stat.frame.i_intra_cost += i_intra_cost; h->stat.frame.i_inter_cost += i_cost; + + if( h->mb.i_subpel_refine >= 7 ) + { + if( IS_INTRA( h->mb.i_type ) ) + { + x264_intra_rd_refine( h, &analysis ); + } + else if( i_partition == D_16x16 ) + { + x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref ); + x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0 ); + } + else if( i_partition == D_16x8 ) + { + x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref ); + x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref ); + x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0 ); + x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 2 ); + } + else if( i_partition == D_8x16 ) + { + x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref ); + x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref ); + x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0 ); + x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 1 ); + } + else if( i_partition == D_8x8 ) + { + int i8x8; + x264_analyse_update_cache( h, &analysis ); + for( i8x8 = 0; i8x8 < 4; i8x8++ ) + if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 ) + x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8 ); + } + } } } else if( h->sh.i_type == SLICE_TYPE_B ) diff --git a/encoder/cabac.c b/encoder/cabac.c index a9caa73b..0f19548f 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -500,7 +500,7 @@ static inline void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, i -static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd ) +static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd ) { const int amvd = abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 1][l] ) + abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 8][l] ); @@ -556,44 +556,38 @@ static inline void x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, i x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mdx, mdy ); } -static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i_list ) +static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int i ) { - int i; - for( i = 0; i < 4; i++ ) - { - if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) - { - continue; - } + if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) + return; - switch( h->mb.i_sub_partition[i] ) - { - case D_L0_8x8: - case D_L1_8x8: - case D_BI_8x8: - x264_cabac_mb_mvd( h, cb, i_list, 4*i, 2, 2 ); - break; - case D_L0_8x4: - case D_L1_8x4: - case D_BI_8x4: - x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 2, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 2, 1 ); - break; - case D_L0_4x8: - case D_L1_4x8: - case D_BI_4x8: - x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 2 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 2 ); - break; - case D_L0_4x4: - case D_L1_4x4: - case D_BI_4x4: - x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 1, 1 ); - x264_cabac_mb_mvd( h, cb, i_list, 4*i+3, 1, 1 ); - break; - } + switch( h->mb.i_sub_partition[i] ) + { + case D_L0_8x8: + case D_L1_8x8: + case D_BI_8x8: + x264_cabac_mb_mvd( h, cb, i_list, 4*i, 2, 2 ); + break; + case D_L0_8x4: + case D_L1_8x4: + case D_BI_8x4: + x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 2, 1 ); + x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 2, 1 ); + break; + case D_L0_4x8: + case D_L1_4x8: + case D_BI_4x8: + x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 2 ); + x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 2 ); + break; + case D_L0_4x4: + case D_L1_4x4: + case D_BI_4x4: + x264_cabac_mb_mvd( h, cb, i_list, 4*i+0, 1, 1 ); + x264_cabac_mb_mvd( h, cb, i_list, 4*i+1, 1, 1 ); + x264_cabac_mb_mvd( h, cb, i_list, 4*i+2, 1, 1 ); + x264_cabac_mb_mvd( h, cb, i_list, 4*i+3, 1, 1 ); + break; } } @@ -912,7 +906,8 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) x264_cabac_mb_ref( h, cb, 0, 12 ); } - x264_cabac_mb8x8_mvd( h, cb, 0 ); + for( i = 0; i < 4; i++ ) + x264_cabac_mb8x8_mvd( h, cb, 0, i ); } else if( i_mb_type == B_8x8 ) { @@ -932,8 +927,10 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) x264_cabac_mb_ref( h, cb, i_list, 4*i ); } - x264_cabac_mb8x8_mvd( h, cb, 0 ); - x264_cabac_mb8x8_mvd( h, cb, 1 ); + for( i = 0; i < 4; i++ ) + x264_cabac_mb8x8_mvd( h, cb, 0, i ); + for( i = 0; i < 4; i++ ) + x264_cabac_mb8x8_mvd( h, cb, 1, i ); } else if( i_mb_type != B_DIRECT ) { @@ -1052,3 +1049,88 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) #endif } +#ifdef RDO_SKIP_BS +/***************************************************************************** + * RD only; doesn't generate a valid bitstream + * doesn't write cbp or chroma dc (I don't know how much this matters) + * works on all partition sizes except 16x16 + * for sub8x8, call once per 8x8 block + *****************************************************************************/ +void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel ) +{ + const int i_mb_type = h->mb.i_type; + int j; + + if( i_mb_type == P_8x8 ) + { + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] ); + if( h->sh.i_num_ref_idx_l0_active > 1 ) + x264_cabac_mb_ref( h, cb, 0, 4*i8 ); + x264_cabac_mb8x8_mvd( h, cb, 0, i8 ); + } + else if( i_mb_type == P_L0 ) + { + if( h->sh.i_num_ref_idx_l0_active > 1 ) + x264_cabac_mb_ref( h, cb, 0, 4*i8 ); + if( h->mb.i_partition == D_16x8 ) + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4, 2 ); + else //8x16 + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 4 ); + } + else if( i_mb_type == B_8x8 ) + { + x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i8] ); + + if( h->sh.i_num_ref_idx_l0_active > 1 + && x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] ) + x264_cabac_mb_ref( h, cb, 0, 4*i8 ); + if( h->sh.i_num_ref_idx_l1_active > 1 + && x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) + x264_cabac_mb_ref( h, cb, 1, 4*i8 ); + + x264_cabac_mb8x8_mvd( h, cb, 0, i8 ); + x264_cabac_mb8x8_mvd( h, cb, 1, i8 ); + } + else + { + x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" ); + return; + } + + for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- ) + { + if( h->mb.i_cbp_luma & (1 << i8) ) + { + if( h->mb.b_transform_8x8 ) + block_residual_write_cabac( h, cb, DCT_LUMA_8x8, i8, h->dct.luma8x8[i8], 64 ); + else + { + int i4; + for( i4 = 0; i4 < 4; i4++ ) + block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 ); + } + } + + block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i8, h->dct.block[16+i8 ].residual_ac, 15 ); + block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i8+4, h->dct.block[16+i8+4].residual_ac, 15 ); + + i8 += x264_pixel_size[i_pixel].h >> 3; + } +} + +static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode ) +{ + const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 ); + i_mode = x264_mb_pred_mode4x4_fix( i_mode ); + x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode ); + block_residual_write_cabac( h, cb, DCT_LUMA_8x8, 4*i8, h->dct.luma8x8[i8], 64 ); +} + +static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode ) +{ + const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 ); + i_mode = x264_mb_pred_mode4x4_fix( i_mode ); + x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode ); + block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.block[i4].luma4x4, 16 ); +} +#endif diff --git a/encoder/cavlc.c b/encoder/cavlc.c index fc662907..27a98e92 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -267,79 +267,56 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s ) bs_write_se( s, i_dqp ); } -static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list ) +static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width ) { - int i; - for( i = 0; i < 4; i++ ) - { - int mvp[2]; + int mvp[2]; + x264_mb_predict_mv( h, i_list, idx, width, mvp ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] ); + bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] ); +} - if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) - { - continue; - } - - switch( h->mb.i_sub_partition[i] ) - { - case D_L0_8x8: - case D_L1_8x8: - case D_BI_8x8: - x264_mb_predict_mv( h, i_list, 4*i, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); - break; - case D_L0_8x4: - case D_L1_8x4: - case D_BI_8x4: - x264_mb_predict_mv( h, i_list, 4*i+0, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); - - x264_mb_predict_mv( h, i_list, 4*i+2, 2, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] ); - break; - case D_L0_4x8: - case D_L1_4x8: - case D_BI_4x8: - x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); - - x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] ); - break; - case D_L0_4x4: - case D_L1_4x4: - case D_BI_4x4: - x264_mb_predict_mv( h, i_list, 4*i+0, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i]][1] - mvp[1] ); - - x264_mb_predict_mv( h, i_list, 4*i+1, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+1]][1] - mvp[1] ); - - x264_mb_predict_mv( h, i_list, 4*i+2, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+2]][1] - mvp[1] ); - - x264_mb_predict_mv( h, i_list, 4*i+3, 1, mvp ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][0] - mvp[0] ); - bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[4*i+3]][1] - mvp[1] ); - break; - } +static void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i_list, int i ) +{ + if( !x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] ) + return; + + switch( h->mb.i_sub_partition[i] ) + { + case D_L0_8x8: + case D_L1_8x8: + case D_BI_8x8: + cavlc_mb_mvd( h, s, i_list, 4*i, 2 ); + break; + case D_L0_8x4: + case D_L1_8x4: + case D_BI_8x4: + cavlc_mb_mvd( h, s, i_list, 4*i+0, 2 ); + cavlc_mb_mvd( h, s, i_list, 4*i+2, 2 ); + break; + case D_L0_4x8: + case D_L1_4x8: + case D_BI_4x8: + cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 ); + cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 ); + break; + case D_L0_4x4: + case D_L1_4x4: + case D_BI_4x4: + cavlc_mb_mvd( h, s, i_list, 4*i+0, 1 ); + cavlc_mb_mvd( h, s, i_list, 4*i+1, 1 ); + cavlc_mb_mvd( h, s, i_list, 4*i+2, 1 ); + cavlc_mb_mvd( h, s, i_list, 4*i+3, 1 ); + break; } } -static void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s ) +static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end ) { int i8, i4, i; if( h->mb.b_transform_8x8 ) { /* shuffle 8x8 dct coeffs into 4x4 lists */ - for( i8 = 0; i8 < 4; i8++ ) + for( i8 = i8start; i8 <= i8end; i8++ ) if( h->mb.i_cbp_luma & (1 << i8) ) for( i4 = 0; i4 < 4; i4++ ) { @@ -350,7 +327,7 @@ static void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s ) } } - for( i8 = 0; i8 < 4; i8++ ) + for( i8 = i8start; i8 <= i8end; i8++ ) if( h->mb.i_cbp_luma & (1 << i8) ) for( i4 = 0; i4 < 4; i4++ ) block_residual_write_cavlc( h, s, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 ); @@ -541,7 +518,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[12]] ); } - x264_sub_mb_mv_write_cavlc( h, s, 0 ); + for( i = 0; i < 4; i++ ) + cavlc_mb8x8_mvd( h, s, 0, i ); } else if( i_mb_type == B_8x8 ) { @@ -568,8 +546,10 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) } } /* mvd */ - x264_sub_mb_mv_write_cavlc( h, s, 0 ); - x264_sub_mb_mv_write_cavlc( h, s, 1 ); + for( i = 0; i < 4; i++ ) + cavlc_mb8x8_mvd( h, s, 0, i ); + for( i = 0; i < 4; i++ ) + cavlc_mb8x8_mvd( h, s, 1, i ); } else if( i_mb_type != B_DIRECT ) { @@ -702,7 +682,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 ) { cavlc_qp_delta( h, s ); - x264_macroblock_luma_write_cavlc( h, s ); + x264_macroblock_luma_write_cavlc( h, s, 0, 3 ); } if( h->mb.i_cbp_chroma != 0 ) { @@ -721,3 +701,98 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) h->stat.frame.i_ptex_bits += bs_pos(s) - i_mb_pos_tex; #endif } + +#ifdef RDO_SKIP_BS +/***************************************************************************** + * RD only; doesn't generate a valid bitstream + * doesn't write cbp or chroma dc (I don't know how much this matters) + * works on all partition sizes except 16x16 + * for sub8x8, call once per 8x8 block + *****************************************************************************/ +int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel ) +{ + bs_t s; + const int i_mb_type = h->mb.i_type; + int j; + + s.i_bits_encoded = 0; + + if( i_mb_type == P_8x8 ) + { + bs_write_ue( &s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] ); + if( h->sh.i_num_ref_idx_l0_active > 1 ) + bs_write_te( &s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[4*i8]] ); + cavlc_mb8x8_mvd( h, &s, 0, i8 ); + } + else if( i_mb_type == P_L0 ) + { + if( h->sh.i_num_ref_idx_l0_active > 1 ) + bs_write_te( &s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[4*i8]] ); + if( h->mb.i_partition == D_16x8 ) + cavlc_mb_mvd( h, &s, 0, 4*i8, 4 ); + else //8x16 + cavlc_mb_mvd( h, &s, 0, 4*i8, 2 ); + } + else if( i_mb_type == B_8x8 ) + { + bs_write_ue( &s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i8] ] ); + + if( h->sh.i_num_ref_idx_l0_active > 1 + && x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] ) + bs_write_te( &s, h->sh.i_num_ref_idx_l0_active - 1, h->mb.cache.ref[0][x264_scan8[4*i8]] ); + if( h->sh.i_num_ref_idx_l1_active > 1 + && x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) + bs_write_te( &s, h->sh.i_num_ref_idx_l1_active - 1, h->mb.cache.ref[1][x264_scan8[4*i8]] ); + + cavlc_mb8x8_mvd( h, &s, 0, i8 ); + cavlc_mb8x8_mvd( h, &s, 1, i8 ); + } + else + { + x264_log(h, X264_LOG_ERROR, "invalid/unhandled mb_type\n" ); + return 0; + } + + for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- ) + { + x264_macroblock_luma_write_cavlc( h, &s, i8, i8 ); + + block_residual_write_cavlc( h, &s, i8, h->dct.block[16+i8 ].residual_ac, 15 ); + block_residual_write_cavlc( h, &s, i8+4, h->dct.block[16+i8+4].residual_ac, 15 ); + + i8 += x264_pixel_size[i_pixel].h >> 3; + } + + return s.i_bits_encoded; +} + +static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode ) +{ + if( x264_mb_predict_intra4x4_mode( h, i4 ) == x264_mb_pred_mode4x4_fix( i_mode ) ) + return 1; + else + return 4; +} + +static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode ) +{ + int i4, i; + h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode ); + for( i4 = 0; i4 < 4; i4++ ) + { + for( i = 0; i < 16; i++ ) + h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4]; + h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = + array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 ); + block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 ); + } + return h->out.bs.i_bits_encoded; +} + +static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode ) +{ + h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode ); + block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.block[i4].luma4x4, 16 ); + return h->out.bs.i_bits_encoded; +} +#endif diff --git a/encoder/encoder.c b/encoder/encoder.c index 97f6e363..59a049c8 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -357,6 +357,7 @@ static int x264_validate_parameters( x264_t *h ) h->param.analyse.i_trellis = 0; h->param.analyse.b_fast_pskip = 0; h->param.analyse.i_noise_reduction = 0; + h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 ); } if( ( h->param.i_width % 16 || h->param.i_height % 16 ) && !h->mb.b_lossless ) @@ -394,7 +395,7 @@ static int x264_validate_parameters( x264_t *h ) h->param.analyse.i_me_range = 4; if( h->param.analyse.i_me_range > 16 && h->param.analyse.i_me_method <= X264_ME_HEX ) h->param.analyse.i_me_range = 16; - h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 ); + h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 7 ); h->param.analyse.b_bframe_rdo = h->param.analyse.b_bframe_rdo && h->param.analyse.i_subpel_refine >= 6; h->param.analyse.b_mixed_references = h->param.analyse.b_mixed_references && h->param.i_frame_reference > 1; h->param.analyse.inter &= X264_ANALYSE_PSUB16x16|X264_ANALYSE_PSUB8x8|X264_ANALYSE_BSUB16x16| diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 3cc6716a..15220e38 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -849,3 +849,88 @@ void x264_denoise_dct( x264_t *h, int16_t *dct ) } } } + +/***************************************************************************** + * RD only; 4 calls to this do not make up for one macroblock_encode. + * doesn't transform chroma dc. + *****************************************************************************/ +void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) +{ + int i_qp = h->mb.i_qp; + uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE; + uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE; + int i_decimate_8x8 = 0; + int nnz8x8 = 1; + int ch; + + x264_mb_mc_8x8( h, i8 ); + + if( h->mb.b_transform_8x8 ) + { + int16_t dct8x8[8][8]; + h->dctf.sub8x8_dct8( dct8x8, p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE ); + + quant_8x8( h, dct8x8, h->quant8_mf[CQM_8PY], i_qp, 0 ); + scan_zigzag_8x8full( h->dct.luma8x8[i8], dct8x8 ); + i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[i8], 64 ); + + if( i_decimate_8x8 < 4 ) + { + memset( h->dct.luma8x8[i8], 0, sizeof(h->dct.luma8x8[i8]) ); + nnz8x8 = 0; + } + if( nnz8x8 ) + { + h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp ); + h->dctf.add8x8_idct8( p_fdec, FDEC_STRIDE, dct8x8 ); + } + } + else + { + int i4, idx; + int16_t dct4x4[4][4][4]; + h->dctf.sub8x8_dct( dct4x4, p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE ); + + for( i4 = 0; i4 < 4; i4++ ) + { + idx = i8 * 4 + i4; + + quant_4x4( h, dct4x4[i4], h->quant4_mf[CQM_4PY], i_qp, 0 ); + scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[i4] ); + i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 ); + } + + if( i_decimate_8x8 < 4 ) + { + memset( &h->dct.block[i8*4], 0, 4 * sizeof(*h->dct.block) ); + nnz8x8 = 0; + } + if( nnz8x8 ) + { + for( i4 = 0; i4 < 4; i4++ ) + h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp ); + h->dctf.add8x8_idct( p_fdec, FDEC_STRIDE, dct4x4 ); + } + } + + i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )]; + + for( ch = 0; ch < 2; ch++ ) + { + int16_t dct4x4[4][4]; + p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE; + p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE; + + h->dctf.sub4x4_dct( dct4x4, p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE ); + quant_4x4( h, dct4x4, h->quant4_mf[CQM_4PC], i_qp, 0 ); + scan_zigzag_4x4( h->dct.block[16+i8+ch*4].residual_ac, dct4x4 ); + h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp ); + h->dctf.add4x4_idct( p_fdec, FDEC_STRIDE, dct4x4 ); + } + + if( nnz8x8 ) + h->mb.i_cbp_luma |= (1 << i8); + else + h->mb.i_cbp_luma &= ~(1 << i8); + h->mb.i_cbp_chroma = 0x02; +} diff --git a/encoder/macroblock.h b/encoder/macroblock.h index f856fd3b..e324bbb0 100644 --- a/encoder/macroblock.h +++ b/encoder/macroblock.h @@ -39,6 +39,8 @@ void x264_macroblock_encode ( x264_t *h ); void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s ); +void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); + void x264_cabac_mb_skip( x264_t *h, int b_skip ); void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat, diff --git a/encoder/me.c b/encoder/me.c index 822baa0c..66bfdfc9 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -39,6 +39,7 @@ static const int subpel_iterations[][4] = {0,2,1,0}, {0,2,1,1}, {0,2,1,2}, + {0,0,2,2}, {0,0,2,2}}; static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel ); @@ -712,3 +713,98 @@ int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight m1->mv[1] = bm1y; return bcost; } + +#define COST_MV_RD( mx, my, dir ) \ +{ \ + if( (dir^1) != odir && (dir<0 || !p_visited[(mx)+(my)*16]) ) \ + { \ + int cost; \ + cache_mv[0] = cache_mv2[0] = mx; \ + cache_mv[1] = cache_mv2[1] = my; \ + cost = x264_rd_cost_part( h, i_lambda2, i8, m->i_pixel ); \ + if( cost < bcost ) \ + { \ + bcost = cost; \ + bmx = mx; \ + bmy = my; \ + } \ + if(dir>=0) p_visited[(mx)+(my)*16] = 1; \ + } \ +} + +void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 ) +{ + // don't have to fill the whole mv cache rectangle + static const int pixel_mv_offs[] = { 0, 4, 4*8, 0 }; + int16_t *cache_mv = h->mb.cache.mv[0][x264_scan8[i8*4]]; + int16_t *cache_mv2 = cache_mv + pixel_mv_offs[m->i_pixel]; + const int bw = x264_pixel_size[m->i_pixel].w>>2; + const int bh = x264_pixel_size[m->i_pixel].h>>2; + + int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX; + int bmx = m->mv[0]; + int bmy = m->mv[1]; + int omx, omy, i; + int odir = -1, bdir; + + int visited[16*13] = {0}; // only need 13x13, but 16 is more convenient + int *p_visited = &visited[6+6*16]; + + if( m->i_pixel != PIXEL_16x16 ) + { + COST_MV_RD( bmx, bmy, -1 ); + x264_mb_predict_mv( h, 0, i8*4, bw, m->mvp ); + } + + /* check the predicted mv */ + if( bmx != m->mvp[0] || bmy != m->mvp[1] ) + COST_MV_RD( m->mvp[0], m->mvp[1], -1 ); + + /* mark mv and mvp as visited */ + p_visited[0] = 1; + p_visited -= bmx + bmy*16; + { + int mx = bmx ^ m->mv[0] ^ m->mvp[0]; + int my = bmy ^ m->mv[1] ^ m->mvp[1]; + if( abs(mx-bmx) < 7 && abs(my-bmy) < 7 ) + p_visited[mx + my*16] = 1; + } + + /* hpel */ + bdir = -1; + for( i = 0; i < 2; i++ ) + { + omx = bmx; + omy = bmy; + odir = bdir; + COST_MV_RD( omx, omy - 2, 0 ); + COST_MV_RD( omx, omy + 2, 1 ); + COST_MV_RD( omx - 2, omy, 2 ); + COST_MV_RD( omx + 2, omy, 3 ); + if( bmx == omx && bmy == omy ) + break; + } + + /* qpel */ + bdir = -1; + for( i = 0; i < 2; i++ ) + { + omx = bmx; + omy = bmy; + odir = bdir; + COST_MV_RD( omx, omy - 1, 0 ); + COST_MV_RD( omx, omy + 1, 1 ); + COST_MV_RD( omx - 1, omy, 2 ); + COST_MV_RD( omx + 1, omy, 3 ); + if( bmx == omx && bmy == omy ) + break; + } + + m->cost = bcost; + m->mv[0] = bmx; + m->mv[1] = bmy; + + x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, 0, bmx, bmy ); + x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, 0, bmx - m->mvp[0], bmy - m->mvp[1] ); +} + diff --git a/encoder/me.h b/encoder/me.h index 03678c82..8c640a97 100644 --- a/encoder/me.h +++ b/encoder/me.h @@ -52,6 +52,8 @@ static inline void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i { x264_me_search_ref( h, m, mvc, i_mvc, NULL ); } void x264_me_refine_qpel( x264_t *h, x264_me_t *m ); +void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 ); int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ); +int x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel ); #endif diff --git a/encoder/rdo.c b/encoder/rdo.c index ba1397d6..480d5ec4 100644 --- a/encoder/rdo.c +++ b/encoder/rdo.c @@ -57,6 +57,12 @@ static int ssd_mb( x264_t *h ) h->mb.pic.p_fdec[2], FDEC_STRIDE ); } +static int ssd_plane( x264_t *h, int size, int p, int x, int y ) +{ + return h->pixf.ssd[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE, + h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE ); +} + static int x264_rd_cost_mb( x264_t *h, int i_lambda2 ) { int b_transform_bak = h->mb.b_transform_8x8; @@ -91,6 +97,86 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 ) return i_ssd + i_bits; } +int x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel ) +{ + int i_ssd, i_bits; + + if( i_pixel == PIXEL_16x16 ) + { + int type_bak = h->mb.i_type; + int i_cost = x264_rd_cost_mb( h, i_lambda2 ); + h->mb.i_type = type_bak; + return i_cost; + } + + x264_macroblock_encode_p8x8( h, i8 ); + if( i_pixel == PIXEL_16x8 ) + x264_macroblock_encode_p8x8( h, i8+1 ); + if( i_pixel == PIXEL_8x16 ) + x264_macroblock_encode_p8x8( h, i8+2 ); + + i_ssd = ssd_plane( h, i_pixel, 0, (i8&1)*8, (i8>>1)*8 ) + + ssd_plane( h, i_pixel+3, 1, (i8&1)*4, (i8>>1)*4 ) + + ssd_plane( h, i_pixel+3, 2, (i8&1)*4, (i8>>1)*4 ); + + if( h->param.b_cabac ) + { + x264_cabac_t cabac_tmp = h->cabac; + cabac_tmp.f8_bits_encoded = 0; + x264_partition_size_cabac( h, &cabac_tmp, i8, i_pixel ); + i_bits = ( cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; + } + else + { + i_bits = x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2; + } + + return i_ssd + i_bits; +} + +int x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode ) +{ + int i_ssd, i_bits; + + x264_mb_encode_i8x8( h, i8, h->mb.i_qp ); + i_ssd = ssd_plane( h, PIXEL_8x8, 0, (i8&1)*8, (i8>>1)*8 ); + + if( h->param.b_cabac ) + { + x264_cabac_t cabac_tmp = h->cabac; + cabac_tmp.f8_bits_encoded = 0; + x264_partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode ); + i_bits = ( cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; + } + else + { + i_bits = x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2; + } + + return i_ssd + i_bits; +} + +int x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode ) +{ + int i_ssd, i_bits; + + x264_mb_encode_i4x4( h, i4, h->mb.i_qp ); + i_ssd = ssd_plane( h, PIXEL_4x4, 0, block_idx_x[i4]*4, block_idx_y[i4]*4 ); + + if( h->param.b_cabac ) + { + x264_cabac_t cabac_tmp = h->cabac; + cabac_tmp.f8_bits_encoded = 0; + x264_partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode ); + i_bits = ( cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; + } + else + { + i_bits = x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2; + } + + return i_ssd + i_bits; +} /**************************************************************************** * Trellis RD quantization diff --git a/x264.c b/x264.c index 10f40415..eb277697 100644 --- a/x264.c +++ b/x264.c @@ -209,7 +209,7 @@ static void Help( x264_param_t *defaults ) " - esa: exhaustive search (slow)\n" " --merange Maximum motion vector search range [%d]\n" " -m, --subme Subpixel motion estimation and partition\n" - " decision quality: 1=fast, 6=best. [%d]\n" + " decision quality: 1=fast, 7=best. [%d]\n" " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" " --mixed-refs Decide references on a per partition basis\n" " --no-chroma-me Ignore chroma in motion estimation\n" -- 2.40.0