}
h->dctf.dct4x4dc( dct_dc4x4 );
- h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
+ if( h->mb.b_trellis )
+ x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);
+ else
+ h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
/* output samples to fdec */
dct2x2[i>>1][i&1] = dct4x4[i][0][0];
dct4x4[i][0][0] = 0;
- /* no trellis; it doesn't seem to help chroma noticeably */
- h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
+ if( h->mb.b_trellis )
+ x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 0 );
+ else
+ h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
if( b_decimate )
}
h->dctf.dct2x2dc( dct2x2 );
- h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
+ if( h->mb.b_trellis )
+ x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
+ else
+ h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
/* output samples to fdec */
p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
- h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
+ dct4x4[0][0] = 0;
+
+ if( h->mb.b_trellis )
+ x264_quant_4x4_trellis( h, dct4x4, CQM_4PC, i_qp, DCT_CHROMA_AC, 0, 0 );
+ else
+ h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
+
h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
- h->dct.luma4x4[16+i8+ch*4][0] = 0;
if( array_non_zero( dct4x4 ) )
{
h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
} trellis_node_t;
// TODO:
-// support chroma and i16x16 DC
// save cabac state between blocks?
// use trellis' RD score instead of x264_mb_decimate_score?
// code 8x8 sig/last flags forwards with deadzone and save the contexts at
// comparable to the input. so unquant is the direct inverse of quant,
// and uses the dct scaling factors, not the idct ones.
-static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
+static ALWAYS_INLINE void quant_trellis_cabac( x264_t *h, int16_t *dct,
const uint16_t *quant_mf, const int *unquant_mf,
const int *coef_weight, const uint8_t *zigzag,
- int i_ctxBlockCat, int i_lambda2, int b_ac, int i_coefs, int idx )
+ int i_ctxBlockCat, int i_lambda2, int b_ac, int dc, int i_coefs, int idx )
{
int abs_coefs[64], signs[64];
trellis_node_t nodes[2][8];
/* init coefs */
for( i = i_coefs-1; i >= b_ac; i-- )
- if( (unsigned)(dct[zigzag[i]] * quant_mf[zigzag[i]] + f-1) >= 2*f )
+ if( (unsigned)(dct[zigzag[i]] * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) + f-1) >= 2*f )
break;
if( i < b_ac )
cabac_state_last[i] = ctx_last[ last_coeff_flag_offset_8x8[i] ];
}
}
- else
+ else if( !dc || i_ctxBlockCat != DCT_CHROMA_DC )
{
memcpy( cabac_state_sig, &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 15 );
memcpy( cabac_state_last, &h->cabac.state[ last_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 15 );
}
+ else
+ {
+ memcpy( cabac_state_sig, &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 3 );
+ memcpy( cabac_state_last, &h->cabac.state[ last_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 3 );
+ }
memcpy( nodes_cur[0].cabac_state, &h->cabac.state[ coeff_abs_level_m1_offset[i_ctxBlockCat] ], 10 );
for( i = i_last_nnz; i >= b_ac; i-- )
{
int i_coef = abs_coefs[i];
- int q = ( f + i_coef * quant_mf[zigzag[i]] ) >> 16;
+ int q = ( f + i_coef * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) ) >> 16;
int abs_level;
int cost_sig[2], cost_last[2];
trellis_node_t n;
// that are better left coded, especially at QP > 40.
for( abs_level = q; abs_level >= q-1; abs_level-- )
{
- int unquant_abs_level = ((unquant_mf[zigzag[i]] * abs_level + 128) >> 8);
+ int unquant_abs_level = (((dc?unquant_mf[0]<<1:unquant_mf[zigzag[i]]) * abs_level + 128) >> 8);
int d = i_coef - unquant_abs_level;
int64_t ssd;
/* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
- if( h->mb.i_psy_trellis && i )
+ if( h->mb.i_psy_trellis && i && !dc && i_ctxBlockCat != DCT_CHROMA_AC )
{
int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][i] : h->mb.pic.fenc_dct4[idx][i];
int predicted_coef = orig_coef - i_coef * signs[i];
ssd = (int64_t)d*d * coef_weight[i] - psy_weight * psy_value;
}
else
- ssd = (int64_t)d*d * coef_weight[i];
+ /* FIXME: for i16x16 dc is this weight optimal? */
+ ssd = (int64_t)d*d * (dc?256:coef_weight[i]);
for( j = 0; j < 8; j++ )
{
}
}
+const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3};
+
+void x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat,
+ int i_qp, int i_ctxBlockCat, int b_intra )
+{
+ quant_trellis_cabac( h, (int16_t*)dct,
+ h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
+ NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
+ i_ctxBlockCat, lambda2_tab[b_intra][i_qp], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
+}
void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
int i_qp, int i_ctxBlockCat, int b_intra, int idx )
{
- int b_ac = (i_ctxBlockCat == DCT_LUMA_AC);
+ int b_ac = (i_ctxBlockCat == DCT_LUMA_AC || i_ctxBlockCat == DCT_CHROMA_AC);
quant_trellis_cabac( h, (int16_t*)dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
x264_dct4_weight2_zigzag[h->mb.b_interlaced],
x264_zigzag_scan4[h->mb.b_interlaced],
- i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 16, idx );
+ i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 0, 16, idx );
}
-
void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
int i_qp, int b_intra, int idx )
{
h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
x264_dct8_weight2_zigzag[h->mb.b_interlaced],
x264_zigzag_scan8[h->mb.b_interlaced],
- DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 64, idx );
+ DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 0, 64, idx );
}