DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
int i, nz;
+ int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
+ int decimate_score = b_decimate ? 0 : 9;
if( h->mb.b_lossless )
{
{
h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
+ if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[i] );
h->mb.i_cbp_luma = 0xf;
}
}
+ /* Writing the 16 CBFs in an i16x16 block is quite costly, so decimation can save many bits. */
+ /* More useful with CAVLC, but still useful with CABAC. */
+ if( decimate_score < 6 )
+ {
+ h->mb.i_cbp_luma = 0;
+ *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
+ *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
+ *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
+ *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
+ }
+
h->dctf.dct4x4dc( dct_dc4x4 );
if( h->mb.b_trellis )
nz = x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);