int i_intra16x16_pred_mode;
int i_chroma_pred_mode;
+ /* skip flags for i4x4 and i8x8
+ * 0 = encode as normal.
+ * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
+ * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
+ int i_skip_intra;
+
struct
{
/* space for p_fenc and p_fdec */
DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 );
DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 );
+ /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
+ DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 );
+ DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 );
+ DECLARE_ALIGNED( int, i8x8_dct_buf[3][64], 16 );
+ DECLARE_ALIGNED( int, i4x4_dct_buf[15][16], 16 );
+
/* pointer over mb of the frame to be compressed */
uint8_t *p_fenc[3];
a->i_satd_i8x8chroma = COST_MAX;
a->b_fast_intra = 0;
+ h->mb.i_skip_intra =
+ h->mb.b_lossless ? 0 :
+ a->b_mbrd ? 2 :
+ !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
/* II: Inter part P/B frame */
if( h->sh.i_type != SLICE_TYPE_I )
}
if( idx == 3 )
+ {
a->i_satd_i8x8 = i_cost;
+ if( h->mb.i_skip_intra )
+ {
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
+ if( h->mb.i_skip_intra == 2 )
+ h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) );
+ }
+ }
else
{
a->i_satd_i8x8 = COST_MAX;
h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
}
if( idx == 15 )
+ {
a->i_satd_i4x4 = i_cost;
+ if( h->mb.i_skip_intra )
+ {
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
+ if( h->mb.i_skip_intra == 2 )
+ h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.block, sizeof(h->mb.pic.i4x4_dct_buf) );
+ }
+ }
else
a->i_satd_i4x4 = COST_MAX;
}
int i_max, i_satd, i_best, i_mode, i_thresh;
int i_pred_mode;
int predict_mode[9];
+ h->mb.i_skip_intra = 0;
if( h->mb.i_type == I_16x16 )
{
h->mb.b_trellis = h->param.analyse.i_trellis;
h->mb.b_noise_reduction = h->param.analyse.i_noise_reduction;
+ if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
+ h->mb.i_skip_intra = 0;
}
/*-------------------- Update MB from the analysis ----------------------*/
{
DECLARE_ALIGNED( uint8_t, edge[33], 16 );
h->mb.b_transform_8x8 = 1;
- for( i = 0; i < 4; i++ )
+ /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
+ if( h->mb.i_skip_intra )
+ {
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
+ /* In RD mode, restore the now-overwritten DCT data. */
+ if( h->mb.i_skip_intra == 2 )
+ h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
+ }
+ for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
{
uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
else if( h->mb.i_type == I_4x4 )
{
h->mb.b_transform_8x8 = 0;
- for( i = 0; i < 16; i++ )
+ /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
+ if( h->mb.i_skip_intra )
+ {
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
+ /* In RD mode, restore the now-overwritten DCT data. */
+ if( h->mb.i_skip_intra == 2 )
+ h->mc.memcpy_aligned( h->dct.block, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
+ }
+ for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
{
uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * FDEC_STRIDE];
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];