#define X264_THREAD_MAX 128
#define X264_SLICE_MAX 4
#define X264_NAL_MAX (4 + X264_SLICE_MAX)
+#define X264_PCM_COST (386*8)
// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
h->mb.i_qp = h->mb.i_last_qp;
- h->mb.qp[i_mb_xy] = h->mb.i_qp;
+ h->mb.qp[i_mb_xy] = i_mb_type != I_PCM ? h->mb.i_qp : 0;
h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp;
h->mb.i_last_qp = h->mb.i_qp;
if( i_mb_type == I_PCM )
{
+ h->mb.i_cbp_chroma = 2;
+ h->mb.i_cbp_luma = 0xf;
h->mb.cbp[i_mb_xy] = 0x72f; /* all set */
+ h->mb.b_transform_8x8 = 0;
for( i = 0; i < 16 + 2*4; i++ )
non_zero_count[i] = 16;
}
/* XXX mb_type isn't the one written in the bitstream -> only internal usage */
-#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_8x8 || (type) == I_16x16 )
+#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_8x8 || (type) == I_16x16 || (type) == I_PCM )
#define IS_SKIP(type) ( (type) == P_SKIP || (type) == B_SKIP )
#define IS_DIRECT(type) ( (type) == B_DIRECT )
enum mb_class_e
int i_satd_i4x4;
int i_predict4x4[16];
+ int i_satd_pcm;
+
/* Chroma part */
int i_satd_i8x8chroma;
int i_satd_i8x8chroma_dir[4];
a->i_satd_i4x4 =
a->i_satd_i8x8chroma = COST_MAX;
+ /* non-RD PCM decision is inaccurate, so don't do it */
+ a->i_satd_pcm = a->b_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;
+
a->b_fast_intra = 0;
h->mb.i_skip_intra =
h->mb.b_lossless ? 0 :
i_cost = analysis.i_satd_i16x16;
h->mb.i_type = I_16x16;
- if( analysis.i_satd_i4x4 < i_cost )
- {
- i_cost = analysis.i_satd_i4x4;
- h->mb.i_type = I_4x4;
- }
- if( analysis.i_satd_i8x8 < i_cost )
- h->mb.i_type = I_8x8;
+ COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, h->mb.i_type, I_4x4 );
+ COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, h->mb.i_type, I_8x8 );
+ if( analysis.i_satd_pcm < i_cost )
+ h->mb.i_type = I_PCM;
- if( h->mb.i_subpel_refine >= 7 )
+ else if( h->mb.i_subpel_refine >= 7 )
x264_intra_rd_refine( h, &analysis );
}
else if( h->sh.i_type == SLICE_TYPE_P )
i_intra_cost = analysis.i_satd_i16x16;
COPY2_IF_LT( i_intra_cost, analysis.i_satd_i8x8, i_intra_type, I_8x8 );
COPY2_IF_LT( i_intra_cost, analysis.i_satd_i4x4, i_intra_type, I_4x4 );
+ COPY2_IF_LT( i_intra_cost, analysis.i_satd_pcm, i_intra_type, I_PCM );
COPY2_IF_LT( i_cost, i_intra_cost, i_type, i_intra_type );
if( i_intra_cost == COST_MAX )
h->stat.frame.i_inter_cost += i_cost;
h->stat.frame.i_mbs_analysed++;
- if( h->mb.i_subpel_refine >= 7 )
+ if( h->mb.i_subpel_refine >= 7 && h->mb.i_type != I_PCM )
{
if( IS_INTRA( h->mb.i_type ) )
{
COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 );
COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 );
+ COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM );
h->mb.i_type = i_type;
h->mb.i_partition = i_partition;
- if( analysis.b_mbrd && h->mb.i_subpel_refine >= 7 && IS_INTRA( i_type ) )
+ if( analysis.b_mbrd && h->mb.i_subpel_refine >= 7 && IS_INTRA( i_type ) && i_type != I_PCM )
x264_intra_rd_refine( h, &analysis );
else if( h->param.analyse.b_bidir_me )
refine_bidir( h, &analysis );
x264_mb_analyse_intra_chroma( h, a );
break;
+ case I_PCM:
+ break;
+
case P_L0:
switch( h->mb.i_partition )
{
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy - 1;
- if( h->mb.i_mb_type_left == I_16x16 )
- i_nza = h->mb.cbp[i_mba_xy] & 0x100;
+ i_nza = h->mb.cbp[i_mba_xy] & 0x100;
}
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_top_xy;
- if( h->mb.i_mb_type_top == I_16x16 )
- i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
+ i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
}
}
else if( i_cat == DCT_LUMA_AC || i_cat == DCT_LUMA_4x4 )
/* Write the MB type */
x264_cabac_mb_type( h, cb );
- /* PCM special block type UNTESTED */
+#ifndef RDO_SKIP_BS
if( i_mb_type == I_PCM )
{
-#ifdef RDO_SKIP_BS
- cb->f8_bits_encoded += (384*8) << 8;
-#else
- if( cb->p + 385 >= cb->p_end )
- return; //FIXME throw an error
- /* Luma */
- for( i = 0; i < 16; i++ )
- {
- memcpy( cb->p, h->fenc->plane[0] + i*h->mb.pic.i_stride[0], 16 );
- cb->p += 16;
- }
- /* Cb */
+ i_mb_pos_tex = x264_cabac_pos( cb );
+ h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
+
+ memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
+ cb->p += 256;
for( i = 0; i < 8; i++ )
- {
- memcpy( cb->p, h->fenc->plane[1] + i*h->mb.pic.i_stride[1], 8 );
- cb->p += 8;
- }
- /* Cr */
+ memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
+ cb->p += 64;
for( i = 0; i < 8; i++ )
- {
- memcpy( cb->p, h->fenc->plane[2] + i*h->mb.pic.i_stride[2], 8 );
- cb->p += 8;
- }
- x264_cabac_encode_init( cb, cb->p, cb->p_end );
-#endif
+ memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
+ cb->p += 64;
+
+ cb->i_low = 0;
+ cb->i_range = 0x01FE;
+ cb->i_queue = -1;
+ cb->i_bytes_outstanding = 0;
+
+ /* if PCM is chosen, we need to store reconstructed frame data */
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
+
+ h->stat.frame.i_itex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
return;
}
+#endif
if( IS_INTRA( i_mb_type ) )
{
bs_write1( s, h->mb.b_interlaced );
}
- /* Write:
- - type
- - prediction
- - mv */
- if( i_mb_type == I_PCM )
+#ifndef RDO_SKIP_BS
+ if( i_mb_type == I_PCM)
{
- /* Untested */
bs_write_ue( s, i_mb_i_offset + 25 );
+ i_mb_pos_tex = bs_pos( s );
+ h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
-#ifdef RDO_SKIP_BS
- s->i_bits_encoded += 384*8;
-#else
bs_align_0( s );
- /* Luma */
- for( i = 0; i < 16*16; i++ )
- {
- const int x = 16 * h->mb.i_mb_x + (i % 16);
- const int y = 16 * h->mb.i_mb_y + (i / 16);
- bs_write( s, 8, h->fenc->plane[0][y*h->mb.pic.i_stride[0]+x] );
- }
- /* Cb */
- for( i = 0; i < 8*8; i++ )
- {
- const int x = 8 * h->mb.i_mb_x + (i % 8);
- const int y = 8 * h->mb.i_mb_y + (i / 8);
- bs_write( s, 8, h->fenc->plane[1][y*h->mb.pic.i_stride[1]+x] );
- }
- /* Cr */
- for( i = 0; i < 8*8; i++ )
- {
- const int x = 8 * h->mb.i_mb_x + (i % 8);
- const int y = 8 * h->mb.i_mb_y + (i / 8);
- bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] );
- }
-#endif
+
+ memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
+ s->p += 256;
+ for( i = 0; i < 8; i++ )
+ memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
+ s->p += 64;
+ for( i = 0; i < 8; i++ )
+ memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
+ s->p += 64;
+
+ /* if PCM is chosen, we need to store reconstructed frame data */
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
+
+ h->stat.frame.i_itex_bits += bs_pos(s) - i_mb_pos_tex;
return;
}
- else if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
+#endif
+
+ /* Write:
+ - type
+ - prediction
+ - mv */
+ if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
{
int di = i_mb_type == I_8x8 ? 4 : 1;
bs_write_ue( s, i_mb_i_offset + 0 );
const int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
- "mb I I16..4: %4.1f%% %4.1f%% %4.1f%%\n",
+ "mb I I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%%\n",
i_mb_count[I_16x16]/ i_count,
i_mb_count[I_8x8] / i_count,
- i_mb_count[I_4x4] / i_count );
+ i_mb_count[I_4x4] / i_count,
+ i_mb_count[I_PCM] / i_count );
}
if( h->stat.i_slice_count[SLICE_TYPE_P] > 0 )
{
const int64_t *i_mb_size = h->stat.i_mb_count_size[SLICE_TYPE_P];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
- "mb P I16..4: %4.1f%% %4.1f%% %4.1f%% P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%% skip:%4.1f%%\n",
+ "mb P I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%% P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%% skip:%4.1f%%\n",
i_mb_count[I_16x16]/ i_count,
i_mb_count[I_8x8] / i_count,
i_mb_count[I_4x4] / i_count,
+ i_mb_count[I_PCM] / i_count,
i_mb_size[PIXEL_16x16] / (i_count*4),
(i_mb_size[PIXEL_16x8] + i_mb_size[PIXEL_8x16]) / (i_count*4),
i_mb_size[PIXEL_8x8] / (i_count*4),
const int64_t *i_mb_size = h->stat.i_mb_count_size[SLICE_TYPE_B];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
- "mb B I16..4: %4.1f%% %4.1f%% %4.1f%% B16..8: %4.1f%% %4.1f%% %4.1f%% direct:%4.1f%% skip:%4.1f%%\n",
+ "mb B I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%% B16..8: %4.1f%% %4.1f%% %4.1f%% direct:%4.1f%% skip:%4.1f%%\n",
i_mb_count[I_16x16] / i_count,
i_mb_count[I_8x8] / i_count,
i_mb_count[I_4x4] / i_count,
+ i_mb_count[I_PCM] / i_count,
i_mb_size[PIXEL_16x16] / (i_count*4),
(i_mb_size[PIXEL_16x8] + i_mb_size[PIXEL_8x16]) / (i_count*4),
i_mb_size[PIXEL_8x8] / (i_count*4),