From: Fiona Glaser Date: Thu, 10 Jul 2008 14:36:45 +0000 (-0600) Subject: Fix and enable I_PCM macroblock support X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6b4ad5f53899a3eafff4307e98fae18998677568;p=libx264 Fix and enable I_PCM macroblock support In RD mode, always consider PCM as a macroblock mode possibility Fix bitstream writing for PCM blocks in CAVLC and CABAC, and a few other minor changes to make PCM work. PCM macroblocks improve compression at very low QPs (1-5) and in lossless mode. --- diff --git a/common/common.h b/common/common.h index 4095b8d7..e2792cc8 100644 --- a/common/common.h +++ b/common/common.h @@ -50,6 +50,7 @@ #define X264_THREAD_MAX 128 #define X264_SLICE_MAX 4 #define X264_NAL_MAX (4 + X264_SLICE_MAX) +#define X264_PCM_COST (386*8) // number of pixels (per thread) in progress at any given time. // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety diff --git a/common/macroblock.c b/common/macroblock.c index 88a7f454..b5e3ebc6 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -1399,7 +1399,7 @@ void x264_macroblock_cache_save( x264_t *h ) if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 ) h->mb.i_qp = h->mb.i_last_qp; - h->mb.qp[i_mb_xy] = h->mb.i_qp; + h->mb.qp[i_mb_xy] = i_mb_type != I_PCM ? h->mb.i_qp : 0; h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp; h->mb.i_last_qp = h->mb.i_qp; @@ -1418,7 +1418,10 @@ void x264_macroblock_cache_save( x264_t *h ) if( i_mb_type == I_PCM ) { + h->mb.i_cbp_chroma = 2; + h->mb.i_cbp_luma = 0xf; h->mb.cbp[i_mb_xy] = 0x72f; /* all set */ + h->mb.b_transform_8x8 = 0; for( i = 0; i < 16 + 2*4; i++ ) non_zero_count[i] = 16; } diff --git a/common/macroblock.h b/common/macroblock.h index 9d9b2223..aff9240e 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -55,7 +55,7 @@ static const uint8_t x264_pred_i4x4_neighbors[12] = /* XXX mb_type isn't the one written in the bitstream -> only internal usage */ -#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_8x8 || (type) == I_16x16 ) +#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_8x8 || (type) == I_16x16 || (type) == I_PCM ) #define IS_SKIP(type) ( (type) == P_SKIP || (type) == B_SKIP ) #define IS_DIRECT(type) ( (type) == B_DIRECT ) enum mb_class_e diff --git a/encoder/analyse.c b/encoder/analyse.c index d22412c9..270b90ae 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -96,6 +96,8 @@ typedef struct int i_satd_i4x4; int i_predict4x4[16]; + int i_satd_pcm; + /* Chroma part */ int i_satd_i8x8chroma; int i_satd_i8x8chroma_dir[4]; @@ -223,6 +225,9 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) a->i_satd_i4x4 = a->i_satd_i8x8chroma = COST_MAX; + /* non-RD PCM decision is inaccurate, so don't do it */ + a->i_satd_pcm = a->b_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX; + a->b_fast_intra = 0; h->mb.i_skip_intra = h->mb.b_lossless ? 0 : @@ -2066,15 +2071,12 @@ void x264_macroblock_analyse( x264_t *h ) i_cost = analysis.i_satd_i16x16; h->mb.i_type = I_16x16; - if( analysis.i_satd_i4x4 < i_cost ) - { - i_cost = analysis.i_satd_i4x4; - h->mb.i_type = I_4x4; - } - if( analysis.i_satd_i8x8 < i_cost ) - h->mb.i_type = I_8x8; + COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, h->mb.i_type, I_4x4 ); + COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, h->mb.i_type, I_8x8 ); + if( analysis.i_satd_pcm < i_cost ) + h->mb.i_type = I_PCM; - if( h->mb.i_subpel_refine >= 7 ) + else if( h->mb.i_subpel_refine >= 7 ) x264_intra_rd_refine( h, &analysis ); } else if( h->sh.i_type == SLICE_TYPE_P ) @@ -2285,6 +2287,7 @@ void x264_macroblock_analyse( x264_t *h ) i_intra_cost = analysis.i_satd_i16x16; COPY2_IF_LT( i_intra_cost, analysis.i_satd_i8x8, i_intra_type, I_8x8 ); COPY2_IF_LT( i_intra_cost, analysis.i_satd_i4x4, i_intra_type, I_4x4 ); + COPY2_IF_LT( i_intra_cost, analysis.i_satd_pcm, i_intra_type, I_PCM ); COPY2_IF_LT( i_cost, i_intra_cost, i_type, i_intra_type ); if( i_intra_cost == COST_MAX ) @@ -2295,7 +2298,7 @@ void x264_macroblock_analyse( x264_t *h ) h->stat.frame.i_inter_cost += i_cost; h->stat.frame.i_mbs_analysed++; - if( h->mb.i_subpel_refine >= 7 ) + if( h->mb.i_subpel_refine >= 7 && h->mb.i_type != I_PCM ) { if( IS_INTRA( h->mb.i_type ) ) { @@ -2566,11 +2569,12 @@ void x264_macroblock_analyse( x264_t *h ) COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 ); COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 ); COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 ); + COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM ); h->mb.i_type = i_type; h->mb.i_partition = i_partition; - if( analysis.b_mbrd && h->mb.i_subpel_refine >= 7 && IS_INTRA( i_type ) ) + if( analysis.b_mbrd && h->mb.i_subpel_refine >= 7 && IS_INTRA( i_type ) && i_type != I_PCM ) x264_intra_rd_refine( h, &analysis ); else if( h->param.analyse.b_bidir_me ) refine_bidir( h, &analysis ); @@ -2612,6 +2616,9 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ) x264_mb_analyse_intra_chroma( h, a ); break; + case I_PCM: + break; + case P_L0: switch( h->mb.i_partition ) { diff --git a/encoder/cabac.c b/encoder/cabac.c index 01278528..53ada981 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -543,14 +543,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) if( h->mb.i_neighbour & MB_LEFT ) { i_mba_xy = h->mb.i_mb_xy - 1; - if( h->mb.i_mb_type_left == I_16x16 ) - i_nza = h->mb.cbp[i_mba_xy] & 0x100; + i_nza = h->mb.cbp[i_mba_xy] & 0x100; } if( h->mb.i_neighbour & MB_TOP ) { i_mbb_xy = h->mb.i_mb_top_xy; - if( h->mb.i_mb_type_top == I_16x16 ) - i_nzb = h->mb.cbp[i_mbb_xy] & 0x100; + i_nzb = h->mb.cbp[i_mbb_xy] & 0x100; } } else if( i_cat == DCT_LUMA_AC || i_cat == DCT_LUMA_4x4 ) @@ -785,36 +783,35 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) /* Write the MB type */ x264_cabac_mb_type( h, cb ); - /* PCM special block type UNTESTED */ +#ifndef RDO_SKIP_BS if( i_mb_type == I_PCM ) { -#ifdef RDO_SKIP_BS - cb->f8_bits_encoded += (384*8) << 8; -#else - if( cb->p + 385 >= cb->p_end ) - return; //FIXME throw an error - /* Luma */ - for( i = 0; i < 16; i++ ) - { - memcpy( cb->p, h->fenc->plane[0] + i*h->mb.pic.i_stride[0], 16 ); - cb->p += 16; - } - /* Cb */ + i_mb_pos_tex = x264_cabac_pos( cb ); + h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start; + + memcpy( cb->p, h->mb.pic.p_fenc[0], 256 ); + cb->p += 256; for( i = 0; i < 8; i++ ) - { - memcpy( cb->p, h->fenc->plane[1] + i*h->mb.pic.i_stride[1], 8 ); - cb->p += 8; - } - /* Cr */ + memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 ); + cb->p += 64; for( i = 0; i < 8; i++ ) - { - memcpy( cb->p, h->fenc->plane[2] + i*h->mb.pic.i_stride[2], 8 ); - cb->p += 8; - } - x264_cabac_encode_init( cb, cb->p, cb->p_end ); -#endif + memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 ); + cb->p += 64; + + cb->i_low = 0; + cb->i_range = 0x01FE; + cb->i_queue = -1; + cb->i_bytes_outstanding = 0; + + /* if PCM is chosen, we need to store reconstructed frame data */ + h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 ); + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 ); + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 ); + + h->stat.frame.i_itex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex; return; } +#endif if( IS_INTRA( i_mb_type ) ) { diff --git a/encoder/cavlc.c b/encoder/cavlc.c index 62fabd73..e7bc11ef 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -339,44 +339,39 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) bs_write1( s, h->mb.b_interlaced ); } - /* Write: - - type - - prediction - - mv */ - if( i_mb_type == I_PCM ) +#ifndef RDO_SKIP_BS + if( i_mb_type == I_PCM) { - /* Untested */ bs_write_ue( s, i_mb_i_offset + 25 ); + i_mb_pos_tex = bs_pos( s ); + h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start; -#ifdef RDO_SKIP_BS - s->i_bits_encoded += 384*8; -#else bs_align_0( s ); - /* Luma */ - for( i = 0; i < 16*16; i++ ) - { - const int x = 16 * h->mb.i_mb_x + (i % 16); - const int y = 16 * h->mb.i_mb_y + (i / 16); - bs_write( s, 8, h->fenc->plane[0][y*h->mb.pic.i_stride[0]+x] ); - } - /* Cb */ - for( i = 0; i < 8*8; i++ ) - { - const int x = 8 * h->mb.i_mb_x + (i % 8); - const int y = 8 * h->mb.i_mb_y + (i / 8); - bs_write( s, 8, h->fenc->plane[1][y*h->mb.pic.i_stride[1]+x] ); - } - /* Cr */ - for( i = 0; i < 8*8; i++ ) - { - const int x = 8 * h->mb.i_mb_x + (i % 8); - const int y = 8 * h->mb.i_mb_y + (i / 8); - bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] ); - } -#endif + + memcpy( s->p, h->mb.pic.p_fenc[0], 256 ); + s->p += 256; + for( i = 0; i < 8; i++ ) + memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 ); + s->p += 64; + for( i = 0; i < 8; i++ ) + memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 ); + s->p += 64; + + /* if PCM is chosen, we need to store reconstructed frame data */ + h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 ); + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 ); + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 ); + + h->stat.frame.i_itex_bits += bs_pos(s) - i_mb_pos_tex; return; } - else if( i_mb_type == I_4x4 || i_mb_type == I_8x8 ) +#endif + + /* Write: + - type + - prediction + - mv */ + if( i_mb_type == I_4x4 || i_mb_type == I_8x8 ) { int di = i_mb_type == I_8x8 ? 4 : 1; bs_write_ue( s, i_mb_i_offset + 0 ); diff --git a/encoder/encoder.c b/encoder/encoder.c index 327776e7..2c2fe8cf 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -1841,10 +1841,11 @@ void x264_encoder_close ( x264_t *h ) const int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I]; const double i_count = h->stat.i_slice_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0; x264_log( h, X264_LOG_INFO, - "mb I I16..4: %4.1f%% %4.1f%% %4.1f%%\n", + "mb I I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%%\n", i_mb_count[I_16x16]/ i_count, i_mb_count[I_8x8] / i_count, - i_mb_count[I_4x4] / i_count ); + i_mb_count[I_4x4] / i_count, + i_mb_count[I_PCM] / i_count ); } if( h->stat.i_slice_count[SLICE_TYPE_P] > 0 ) { @@ -1852,10 +1853,11 @@ void x264_encoder_close ( x264_t *h ) const int64_t *i_mb_size = h->stat.i_mb_count_size[SLICE_TYPE_P]; const double i_count = h->stat.i_slice_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0; x264_log( h, X264_LOG_INFO, - "mb P I16..4: %4.1f%% %4.1f%% %4.1f%% P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%% skip:%4.1f%%\n", + "mb P I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%% P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%% skip:%4.1f%%\n", i_mb_count[I_16x16]/ i_count, i_mb_count[I_8x8] / i_count, i_mb_count[I_4x4] / i_count, + i_mb_count[I_PCM] / i_count, i_mb_size[PIXEL_16x16] / (i_count*4), (i_mb_size[PIXEL_16x8] + i_mb_size[PIXEL_8x16]) / (i_count*4), i_mb_size[PIXEL_8x8] / (i_count*4), @@ -1869,10 +1871,11 @@ void x264_encoder_close ( x264_t *h ) const int64_t *i_mb_size = h->stat.i_mb_count_size[SLICE_TYPE_B]; const double i_count = h->stat.i_slice_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0; x264_log( h, X264_LOG_INFO, - "mb B I16..4: %4.1f%% %4.1f%% %4.1f%% B16..8: %4.1f%% %4.1f%% %4.1f%% direct:%4.1f%% skip:%4.1f%%\n", + "mb B I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%% B16..8: %4.1f%% %4.1f%% %4.1f%% direct:%4.1f%% skip:%4.1f%%\n", i_mb_count[I_16x16] / i_count, i_mb_count[I_8x8] / i_count, i_mb_count[I_4x4] / i_count, + i_mb_count[I_PCM] / i_count, i_mb_size[PIXEL_16x16] / (i_count*4), (i_mb_size[PIXEL_16x8] + i_mb_size[PIXEL_8x16]) / (i_count*4), i_mb_size[PIXEL_8x8] / (i_count*4),