SECTION .text
-cglobal predict_8x8_v_mmx
+cglobal predict_8x8c_v_mmx
cglobal predict_16x16_v_mmx
;-----------------------------------------------------------------------------
;
-; void predict_8x8_v_mmx( uint8_t *src, int i_stride )
+; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
;
;-----------------------------------------------------------------------------
ALIGN 16
-predict_8x8_v_mmx :
+predict_8x8c_v_mmx :
movsxd rcx, esi ; i_stride
sub rdi , rcx ; esi <-- line -1
#endif
-static const int x264_cabac_context_init_I[399][2] =
+static const int x264_cabac_context_init_I[460][2] =
{
/* 0 - 10 */
{ 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 },
{ 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 },
{ 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 },
{ 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 },
- { 29, 39 }, { 19, 66 }
+ { 29, 39 }, { 19, 66 },
+
+ /* 399 -> 435 */
+ { 31, 21 }, { 31, 31 }, { 25, 50 },
+ { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 },
+ { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 },
+ { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 },
+ { -23, 68 }, { -24, 50 }, { -11, 74 }, { 23, -13 },
+ { 26, -13 }, { 40, -15 }, { 49, -14 }, { 44, 3 },
+ { 45, 6 }, { 44, 34 }, { 33, 54 }, { 19, 82 },
+ { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 },
+ { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 },
+ { 0, 68 }, { -9, 92 },
+
+ /* 436 -> 459 */
+ { -14, 106 }, { -13, 97 }, { -15, 90 }, { -12, 90 },
+ { -18, 88 }, { -10, 73 }, { -9, 79 }, { -14, 86 },
+ { -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 },
+ { -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 },
+ { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 },
+ { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }
};
-static const int x264_cabac_context_init_PB[3][399][2] =
+static const int x264_cabac_context_init_PB[3][460][2] =
{
/* i_cabac_init_idc == 0 */
{
{ 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 },
{ 11, 86 },
-
+ /* 399 -> 435 */
+ { 12, 40 }, { 11, 51 }, { 14, 59 },
+ { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 },
+ { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 },
+ { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 },
+ { -16, 66 }, { -22, 65 }, { -20, 63 }, { 9, -2 },
+ { 26, -9 }, { 33, -9 }, { 39, -7 }, { 41, -2 },
+ { 45, 3 }, { 49, 9 }, { 45, 27 }, { 36, 59 },
+ { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 },
+ { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 },
+ { -8, 66 }, { -8, 76 },
+
+ /* 436 -> 459 */
+ { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 },
+ { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 },
+ { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 },
+ { -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 },
+ { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 },
+ { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 },
},
/* i_cabac_init_idc == 1 */
{ 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 },
{ 11, 83 },
+ /* 399 -> 435 */
+ { 24, 32 }, { 21, 49 }, { 21, 54 },
+ { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 },
+ { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 8 },
+ { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 },
+ { -14, 66 }, { 0, 59 }, { 2, 59 }, { 17, -10 },
+ { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
+ { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
+ { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 },
+ { -2, 52 }, { -9, 57 }, { -6, 53 }, { -4, 65 },
+ { -4, 67 }, { -7, 82 },
+
+ /* 436 -> 459 */
+ { -3, 81 }, { -3, 76 }, { -7, 72 }, { -6, 78 },
+ { -12, 72 }, { -14, 68 }, { -3, 70 }, { -6, 76 },
+ { -5, 66 }, { -5, 62 }, { 0, 57 }, { -4, 61 },
+ { -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 },
+ { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
+ { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
},
/* i_cabac_init_idc == 2 */
{ 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 },
{ 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 },
{ 25, 61 },
+
+ /* 399 -> 435 */
+ { 21, 33 }, { 19, 50 }, { 17, 61 },
+ { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 },
+ { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 },
+ { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 },
+ { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 },
+ { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
+ { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
+ { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 },
+ { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 },
+ { -6, 68 }, { -10, 79 },
+
+ /* 436 -> 459 */
+ { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 },
+ { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 },
+ { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 },
+ { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 },
+ { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
+ { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
}
};
*****************************************************************************/
void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model )
{
- const int (*cabac_context_init)[399][2];
+ const int (*cabac_context_init)[460][2];
int i;
if( i_slice_type == SLICE_TYPE_I )
cabac_context_init = &x264_cabac_context_init_PB[i_model];
}
- for( i = 0; i < 399; i++ )
+ for( i = 0; i < 436; i++ )
{
int i_pre_state;
i_cost = 0; /* fix8 */
- for( i_ctx = 0; i_ctx < 399; i_ctx++ )
+ for( i_ctx = 0; i_ctx < 436; i_ctx++ )
{
int i_weight;
int i_model_state;
} slice[3];
/* context */
+ /* states 436-459 are for interlacing, so are omitted for now */
struct
{
int i_state;
int i_mps;
int i_count;
- } ctxstate[399];
+ } ctxstate[436];
/* state */
int i_low;
param->i_log_level = X264_LOG_INFO;
/* */
- param->analyse.intra = X264_ANALYSE_I4x4;
+ param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8;
param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL;
param->analyse.i_me_method = X264_ME_HEX;
{
DECLARE_ALIGNED( int, luma16x16_dc[16], 16 );
DECLARE_ALIGNED( int, chroma_dc[2][4], 16 );
+ // FIXME merge with union
+ DECLARE_ALIGNED( int, luma8x8[4][64], 16 );
union
{
DECLARE_ALIGNED( int, residual_ac[15], 16 );
/* neighboring MBs */
unsigned int i_neighbour;
+ unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */
+ unsigned int i_neighbour4[16]; /* at the time the block is coded */
int i_mb_type_top;
int i_mb_type_left;
int i_mb_type_topleft;
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */
int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
+ int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
/* current value */
int i_type;
int i_partition;
int i_sub_partition[4];
+ int b_transform_8x8;
int i_cbp_luma;
int i_cbp_chroma;
/* cache */
struct
{
- /* real intra4x4_pred_mode if I_4X4, I_PRED_4x4_DC if mb available, -1 if not */
+ /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
int intra4x4_pred_mode[X264_SCAN8_SIZE];
/* i_non_zero_count if availble else 0x80 */
int16_t direct_mv[2][X264_SCAN8_SIZE][2];
int8_t direct_ref[2][X264_SCAN8_SIZE];
+
+ /* top and left neighbors. 1=>8x8, 0=>4x4 */
+ int8_t transform_size[2];
} cache;
/* */
/* ? */
int i_misc_bits;
/* MB type counts */
- int i_mb_count[18];
+ int i_mb_count[19];
int i_mb_count_p;
int i_mb_count_skip;
/* Estimated (SATD) cost as Intra/Predicted frame */
float f_psnr_mean_u[5];
float f_psnr_mean_v[5];
/* */
- int64_t i_mb_count[5][18];
+ int64_t i_mb_count[5][19];
} stat;
/* CPU functions dependants */
x264_predict_t predict_16x16[4+3];
- x264_predict_t predict_8x8[4+3];
+ x264_predict_t predict_8x8c[4+3];
+ x264_predict8x8_t predict_8x8[9+3];
x264_predict_t predict_4x4[9+3];
x264_pixel_function_t pixf;
add8x8_idct( &p_dst[8*i_dst+8], i_dst, &dct[12] );
}
+/****************************************************************************
+ * 8x8 transform:
+ ****************************************************************************/
+
+static inline void dct8_1d( int16_t src[8][8], int16_t dst[8][8] )
+{
+ int i;
+ for( i = 0; i < 8; i++ )
+ {
+ const int s07 = src[i][0] + src[i][7];
+ const int s16 = src[i][1] + src[i][6];
+ const int s25 = src[i][2] + src[i][5];
+ const int s34 = src[i][3] + src[i][4];
+
+ const int a0 = s07 + s34;
+ const int a1 = s16 + s25;
+ const int a2 = s07 - s34;
+ const int a3 = s16 - s25;
+
+ const int d07 = src[i][0] - src[i][7];
+ const int d16 = src[i][1] - src[i][6];
+ const int d25 = src[i][2] - src[i][5];
+ const int d34 = src[i][3] - src[i][4];
+
+ const int a4 = d16 + d25 + (d07 + (d07>>1));
+ const int a5 = d07 - d34 - (d25 + (d25>>1));
+ const int a6 = d07 + d34 - (d16 + (d16>>1));
+ const int a7 = d16 - d25 + (d34 + (d34>>1));
+
+ dst[0][i] = a0 + a1;
+ dst[1][i] = a4 + (a7>>2);
+ dst[2][i] = a2 + (a3>>1);
+ dst[3][i] = a5 + (a6>>2);
+ dst[4][i] = a0 - a1;
+ dst[5][i] = a6 - (a5>>2);
+ dst[6][i] = (a2>>1) - a3;
+ dst[7][i] = (a4>>2) - a7;
+ }
+}
+
+static void sub8x8_dct8( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+{
+ int16_t d[8][8];
+ int16_t tmp[8][8];
+ int y, x;
+
+ for( y = 0; y < 8; y++ )
+ {
+ for( x = 0; x < 8; x++ )
+ {
+ d[y][x] = pix1[x] - pix2[x];
+ }
+ pix1 += i_pix1;
+ pix2 += i_pix2;
+ }
+
+ dct8_1d( d, tmp );
+ dct8_1d( tmp, dct );
+}
+
+static void sub16x16_dct8( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+{
+ sub8x8_dct8( dct[0], pix1, i_pix1, pix2, i_pix2 );
+ sub8x8_dct8( dct[1], &pix1[8], i_pix1, &pix2[8], i_pix2 );
+ sub8x8_dct8( dct[2], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
+ sub8x8_dct8( dct[3], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
+}
+
+static inline void idct8_1d( int16_t src[8][8], int16_t dst[8][8] )
+{
+ int i;
+ for( i = 0; i < 8; i++ )
+ {
+ const int a0 = src[i][0] + src[i][4];
+ const int a2 = src[i][0] - src[i][4];
+ const int a4 = (src[i][2]>>1) - src[i][6];
+ const int a6 = (src[i][6]>>1) + src[i][2];
+
+ const int b0 = a0 + a6;
+ const int b2 = a2 + a4;
+ const int b4 = a2 - a4;
+ const int b6 = a0 - a6;
+
+ const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1);
+ const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1);
+ const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1);
+ const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1);
+
+ const int b1 = (a7>>2) + a1;
+ const int b3 = a3 + (a5>>2);
+ const int b5 = (a3>>2) - a5;
+ const int b7 = a7 - (a1>>2);
+
+ dst[0][i] = b0 + b7;
+ dst[7][i] = b0 - b7;
+ dst[1][i] = b2 + b5;
+ dst[6][i] = b2 - b5;
+ dst[2][i] = b4 + b3;
+ dst[5][i] = b4 - b3;
+ dst[3][i] = b6 + b1;
+ dst[4][i] = b6 - b1;
+ }
+}
+
+static void add8x8_idct8( uint8_t *p_dst, int i_dst, int16_t dct[8][8] )
+{
+ int16_t d[8][8];
+ int16_t tmp[8][8];
+ int y, x;
+
+ idct8_1d( dct, tmp );
+ idct8_1d( tmp, d );
+
+ for( y = 0; y < 8; y++ )
+ {
+ for( x = 0; x < 8; x++ )
+ {
+ p_dst[x] = clip_uint8( p_dst[x] + ((d[y][x] + 32) >> 6) );
+ }
+ p_dst += i_dst;
+ }
+}
+
+static void add16x16_idct8( uint8_t *p_dst, int i_dst, int16_t dct[4][8][8] )
+{
+ add8x8_idct8( &p_dst[0], i_dst, dct[0] );
+ add8x8_idct8( &p_dst[8], i_dst, dct[1] );
+ add8x8_idct8( &p_dst[8*i_dst], i_dst, dct[2] );
+ add8x8_idct8( &p_dst[8*i_dst+8], i_dst, dct[3] );
+}
/****************************************************************************
dctf->sub8x8_dct = sub8x8_dct;
dctf->add8x8_idct = add8x8_idct;
- dctf->sub16x16_dct = sub16x16_dct;
- dctf->add16x16_idct = add16x16_idct;
+ dctf->sub16x16_dct = sub16x16_dct;
+ dctf->add16x16_idct = add16x16_idct;
+
+ dctf->sub8x8_dct8 = sub8x8_dct8;
+ dctf->add8x8_idct8 = add8x8_idct8;
+
+ dctf->sub16x16_dct8 = sub16x16_dct8;
+ dctf->add16x16_idct8 = add16x16_idct8;
dctf->dct4x4dc = dct4x4dc;
dctf->idct4x4dc = idct4x4dc;
void (*sub16x16_dct) ( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void (*add16x16_idct) ( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
+ void (*sub8x8_dct8) ( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
+ void (*add8x8_idct8) ( uint8_t *p_dst, int i_dst, int16_t dct[8][8] );
+
+ void (*sub16x16_dct8) ( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
+ void (*add16x16_idct8) ( uint8_t *p_dst, int i_dst, int16_t dct[4][8][8] );
void (*dct4x4dc) ( int16_t d[4][4] );
void (*idct4x4dc)( int16_t d[4][4] );
const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
int i_edge;
int i_dir;
+ const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
+
+ /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
+ * entropy coding, but per 64 coeffs for the purpose of deblocking */
+ if( !h->param.b_cabac && b_8x8_transform )
+ {
+ uint32_t *nnz = (uint32_t*)h->mb.non_zero_count[mb_xy];
+ if( nnz[0] ) nnz[0] = 0x01010101;
+ if( nnz[1] ) nnz[1] = 0x01010101;
+ if( nnz[2] ) nnz[2] = 0x01010101;
+ if( nnz[3] ) nnz[3] = 0x01010101;
+ }
/* i_dir == 0 -> vertical edge
* i_dir == 1 -> horizontal edge */
if( i_dir == 0 )
{
/* vertical edge */
- deblocking_filter_edgev( h, &h->fdec->plane[0][16 * mb_y * h->fdec->i_stride[0]+ 16 * mb_x + 4 * i_edge],
- h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1);
- if( (i_edge % 2) == 0 )
+ if( !b_8x8_transform || !(i_edge & 1) )
+ {
+ deblocking_filter_edgev( h, &h->fdec->plane[0][16 * mb_y * h->fdec->i_stride[0]+ 16 * mb_x + 4 * i_edge],
+ h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1);
+ }
+ if( !(i_edge & 1) )
{
/* U/V planes */
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
else
{
/* horizontal edge */
- deblocking_filter_edgeh( h, &h->fdec->plane[0][(16*mb_y + 4 * i_edge) * h->fdec->i_stride[0]+ 16 * mb_x],
- h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1 );
+ if( !b_8x8_transform || !(i_edge & 1) )
+ {
+ deblocking_filter_edgeh( h, &h->fdec->plane[0][(16*mb_y + 4 * i_edge) * h->fdec->i_stride[0]+ 16 * mb_x],
+ h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1 );
+ }
/* U/V planes */
- if( ( i_edge % 2 ) == 0 )
+ if( !(i_edge & 1) )
{
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
SECTION .text
-cglobal predict_8x8_v_mmx
+cglobal predict_8x8c_v_mmx
cglobal predict_16x16_v_mmx
;-----------------------------------------------------------------------------
;
-; void predict_8x8_v_mmx( uint8_t *src, int i_stride )
+; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
;
;-----------------------------------------------------------------------------
ALIGN 16
-predict_8x8_v_mmx :
+predict_8x8c_v_mmx :
;push edi
;push esi
/****************************************************************************
* 8x8 prediction for intra chroma block DC, H, V, P
****************************************************************************/
-static void predict_8x8_dc_128( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_128( uint8_t *src, int i_stride )
{
int y;
src += i_stride;
}
}
-static void predict_8x8_dc_left( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
{
int y;
uint32_t dc0 = 0, dc1 = 0;
}
}
-static void predict_8x8_dc_top( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
{
int y, x;
uint32_t dc0 = 0, dc1 = 0;
src += i_stride;
}
}
-static void predict_8x8_dc( uint8_t *src, int i_stride )
+static void predict_8x8c_dc( uint8_t *src, int i_stride )
{
int y;
int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
}
}
-static void predict_8x8_h( uint8_t *src, int i_stride )
+static void predict_8x8c_h( uint8_t *src, int i_stride )
{
int i;
}
}
-extern void predict_8x8_v_mmx( uint8_t *src, int i_stride );
+extern void predict_8x8c_v_mmx( uint8_t *src, int i_stride );
#if 0
-static void predict_8x8_v( uint8_t *src, int i_stride )
+static void predict_8x8c_v( uint8_t *src, int i_stride )
{
int i;
/****************************************************************************
- * 4x4 prediction for intra luma block DC, H, V, P
+ * 4x4 prediction for intra luma block
****************************************************************************/
static void predict_4x4_dc_128( uint8_t *src, int i_stride )
{
pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128;
}
-void x264_predict_8x8_init_mmxext( x264_predict_t pf[7] )
+void x264_predict_8x8c_init_mmxext( x264_predict_t pf[7] )
{
- pf[I_PRED_CHROMA_V ] = predict_8x8_v_mmx;
- pf[I_PRED_CHROMA_H ] = predict_8x8_h;
- pf[I_PRED_CHROMA_DC] = predict_8x8_dc;
- pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8_dc_left;
- pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8_dc_top;
- pf[I_PRED_CHROMA_DC_128 ]= predict_8x8_dc_128;
+ pf[I_PRED_CHROMA_V ] = predict_8x8c_v_mmx;
+ pf[I_PRED_CHROMA_H ] = predict_8x8c_h;
+ pf[I_PRED_CHROMA_DC] = predict_8x8c_dc;
+ pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left;
+ pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top;
+ pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128;
}
void x264_predict_4x4_init_mmxext( x264_predict_t pf[12] )
#define _I386_PREDICT_H 1
void x264_predict_16x16_init_mmxext ( x264_predict_t pf[7] );
-void x264_predict_8x8_init_mmxext ( x264_predict_t pf[7] );
+void x264_predict_8x8c_init_mmxext ( x264_predict_t pf[7] );
void x264_predict_4x4_init_mmxext ( x264_predict_t pf[12] );
#endif
{ {18, 23, 18, 23}, {23, 29, 23, 29}, {18, 23, 18, 23}, {23, 29, 23, 29} }
};
-#if 0
-static const int i_chroma_qp_table[52] =
+static const int dequant8_mf[6][8][8] =
{
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
- 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
- 39, 39
+ {
+ {20, 19, 25, 19, 20, 19, 25, 19},
+ {19, 18, 24, 18, 19, 18, 24, 18},
+ {25, 24, 32, 24, 25, 24, 32, 24},
+ {19, 18, 24, 18, 19, 18, 24, 18},
+ {20, 19, 25, 19, 20, 19, 25, 19},
+ {19, 18, 24, 18, 19, 18, 24, 18},
+ {25, 24, 32, 24, 25, 24, 32, 24},
+ {19, 18, 24, 18, 19, 18, 24, 18}
+ }, {
+ {22, 21, 28, 21, 22, 21, 28, 21},
+ {21, 19, 26, 19, 21, 19, 26, 19},
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {21, 19, 26, 19, 21, 19, 26, 19},
+ {22, 21, 28, 21, 22, 21, 28, 21},
+ {21, 19, 26, 19, 21, 19, 26, 19},
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {21, 19, 26, 19, 21, 19, 26, 19}
+ }, {
+ {26, 24, 33, 24, 26, 24, 33, 24},
+ {24, 23, 31, 23, 24, 23, 31, 23},
+ {33, 31, 42, 31, 33, 31, 42, 31},
+ {24, 23, 31, 23, 24, 23, 31, 23},
+ {26, 24, 33, 24, 26, 24, 33, 24},
+ {24, 23, 31, 23, 24, 23, 31, 23},
+ {33, 31, 42, 31, 33, 31, 42, 31},
+ {24, 23, 31, 23, 24, 23, 31, 23}
+ }, {
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {26, 25, 33, 25, 26, 25, 33, 25},
+ {35, 33, 45, 33, 35, 33, 45, 33},
+ {26, 25, 33, 25, 26, 25, 33, 25},
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {26, 25, 33, 25, 26, 25, 33, 25},
+ {35, 33, 45, 33, 35, 33, 45, 33},
+ {26, 25, 33, 25, 26, 25, 33, 25}
+ }, {
+ {32, 30, 40, 30, 32, 30, 40, 30},
+ {30, 28, 38, 28, 30, 28, 38, 28},
+ {40, 38, 51, 38, 40, 38, 51, 38},
+ {30, 28, 38, 28, 30, 28, 38, 28},
+ {32, 30, 40, 30, 32, 30, 40, 30},
+ {30, 28, 38, 28, 30, 28, 38, 28},
+ {40, 38, 51, 38, 40, 38, 51, 38},
+ {30, 28, 38, 28, 30, 28, 38, 28}
+ }, {
+ {36, 34, 46, 34, 36, 34, 46, 34},
+ {34, 32, 43, 32, 34, 32, 43, 32},
+ {46, 43, 58, 43, 46, 43, 58, 43},
+ {34, 32, 43, 32, 34, 32, 43, 32},
+ {36, 34, 46, 34, 36, 34, 46, 34},
+ {34, 32, 43, 32, 34, 32, 43, 32},
+ {46, 43, 58, 43, 46, 43, 58, 43},
+ {34, 32, 43, 32, 34, 32, 43, 32}
+ }
};
-#endif
int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
{
const int ma = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 1];
const int mb = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 8];
- const int m = X264_MIN( ma, mb );
+ const int m = X264_MIN( x264_mb_pred_mode4x4_fix(ma),
+ x264_mb_pred_mode4x4_fix(mb) );
if( m < 0 )
return I_PRED_4x4_DC;
return i_ret & 0x7f;
}
+int x264_mb_transform_8x8_allowed( x264_t *h, int i_mb_type )
+{
+ int i;
+ if( i_mb_type == P_8x8 || i_mb_type == B_8x8 )
+ {
+ for( i = 0; i < 4; i++ )
+ if( !IS_SUB8x8(h->mb.i_sub_partition[i])
+ || ( h->mb.i_sub_partition[i] == D_DIRECT_8x8 && !h->sps->b_direct8x8_inference ) )
+ {
+ return 0;
+ }
+ }
+ if( i_mb_type == B_DIRECT && !h->sps->b_direct8x8_inference )
+ return 0;
+
+ return 1;
+}
+
/****************************************************************************
* Scan and Quant functions
****************************************************************************/
}
}
+void x264_mb_dequant_8x8( int16_t dct[8][8], int i_qscale )
+{
+ const int i_mf = i_qscale%6;
+ int y;
+
+ if( i_qscale >= 12 )
+ {
+ const int i_qbits = (i_qscale/6) - 2;
+ for( y = 0; y < 8; y++ )
+ {
+ dct[y][0] = ( dct[y][0] * dequant8_mf[i_mf][y][0] ) << i_qbits;
+ dct[y][1] = ( dct[y][1] * dequant8_mf[i_mf][y][1] ) << i_qbits;
+ dct[y][2] = ( dct[y][2] * dequant8_mf[i_mf][y][2] ) << i_qbits;
+ dct[y][3] = ( dct[y][3] * dequant8_mf[i_mf][y][3] ) << i_qbits;
+ dct[y][4] = ( dct[y][4] * dequant8_mf[i_mf][y][4] ) << i_qbits;
+ dct[y][5] = ( dct[y][5] * dequant8_mf[i_mf][y][5] ) << i_qbits;
+ dct[y][6] = ( dct[y][6] * dequant8_mf[i_mf][y][6] ) << i_qbits;
+ dct[y][7] = ( dct[y][7] * dequant8_mf[i_mf][y][7] ) << i_qbits;
+ }
+ }
+ else
+ {
+ const int i_qbits = 2 - (i_qscale/6);
+ const int i_round = i_qbits; // 1<<(i_qbits-1)
+ for( y = 0; y < 8; y++ )
+ {
+ dct[y][0] = ( dct[y][0] * dequant8_mf[i_mf][y][0] + i_round ) >> i_qbits;
+ dct[y][1] = ( dct[y][1] * dequant8_mf[i_mf][y][1] + i_round ) >> i_qbits;
+ dct[y][2] = ( dct[y][2] * dequant8_mf[i_mf][y][2] + i_round ) >> i_qbits;
+ dct[y][3] = ( dct[y][3] * dequant8_mf[i_mf][y][3] + i_round ) >> i_qbits;
+ dct[y][4] = ( dct[y][4] * dequant8_mf[i_mf][y][4] + i_round ) >> i_qbits;
+ dct[y][5] = ( dct[y][5] * dequant8_mf[i_mf][y][5] + i_round ) >> i_qbits;
+ dct[y][6] = ( dct[y][6] * dequant8_mf[i_mf][y][6] + i_round ) >> i_qbits;
+ dct[y][7] = ( dct[y][7] * dequant8_mf[i_mf][y][7] + i_round ) >> i_qbits;
+ }
+ }
+}
+
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] )
{
const int i8 = x264_scan8[idx];
h->mb.i_b8_stride = h->sps->i_mb_width * 2;
h->mb.i_b4_stride = h->sps->i_mb_width * 4;
- h->mb.qp = x264_malloc( i_mb_count * sizeof( int8_t) );
- h->mb.cbp = x264_malloc( i_mb_count * sizeof( int16_t) );
- h->mb.skipbp = x264_malloc( i_mb_count * sizeof( int8_t) );
+ h->mb.qp = x264_malloc( i_mb_count * sizeof(int8_t) );
+ h->mb.cbp = x264_malloc( i_mb_count * sizeof(int16_t) );
+ h->mb.skipbp = x264_malloc( i_mb_count * sizeof(int8_t) );
+ h->mb.mb_transform_size = x264_malloc( i_mb_count * sizeof(int8_t) );
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
h->mb.intra4x4_pred_mode = x264_malloc( i_mb_count * 7 * sizeof( int8_t ) );
}
x264_free( h->mb.intra4x4_pred_mode );
x264_free( h->mb.non_zero_count );
+ x264_free( h->mb.mb_transform_size );
x264_free( h->mb.skipbp );
x264_free( h->mb.cbp );
x264_free( h->mb.qp );
else
h->mb.i_mb_type_topleft = -1;
+ if( h->param.analyse.b_transform_8x8 )
+ {
+ h->mb.cache.transform_size[0] = (h->mb.i_neighbour&MB_LEFT)
+ && h->mb.mb_transform_size[i_left_xy];
+ h->mb.cache.transform_size[1] = (h->mb.i_neighbour&MB_TOP)
+ && h->mb.mb_transform_size[i_top_xy];
+ }
+
/* load ref/mv/mvd */
if( h->sh.i_type != SLICE_TYPE_I )
{
}
}
}
+
+ // FIXME skip this if I_4x4 and I_8x8 are disabled?
+ // assumes MB_TOPRIGHT = MB_TOP<<1
+ h->mb.i_neighbour4[0] =
+ h->mb.i_neighbour8[0] = (h->mb.i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT))
+ | ((h->mb.i_neighbour & MB_TOP) ? MB_TOPRIGHT : 0);
+ h->mb.i_neighbour4[4] =
+ h->mb.i_neighbour4[1] = MB_LEFT | ((h->mb.i_neighbour & MB_TOP) ? (MB_TOP|MB_TOPLEFT|MB_TOPRIGHT) : 0);
+ h->mb.i_neighbour4[2] =
+ h->mb.i_neighbour4[8] =
+ h->mb.i_neighbour4[10] =
+ h->mb.i_neighbour8[2] = MB_TOP|MB_TOPRIGHT | ((h->mb.i_neighbour & MB_LEFT) ? (MB_LEFT|MB_TOPLEFT) : 0);
+ h->mb.i_neighbour4[3] =
+ h->mb.i_neighbour4[7] =
+ h->mb.i_neighbour4[11] =
+ h->mb.i_neighbour4[13] =
+ h->mb.i_neighbour4[15] =
+ h->mb.i_neighbour8[3] = MB_LEFT|MB_TOP|MB_TOPLEFT;
+ h->mb.i_neighbour4[5] =
+ h->mb.i_neighbour8[1] = MB_LEFT | (h->mb.i_neighbour & MB_TOPRIGHT)
+ | ((h->mb.i_neighbour & MB_TOP) ? MB_TOP|MB_TOPLEFT : 0);
+ h->mb.i_neighbour4[6] =
+ h->mb.i_neighbour4[9] =
+ h->mb.i_neighbour4[12] =
+ h->mb.i_neighbour4[14] = MB_LEFT|MB_TOP|MB_TOPLEFT|MB_TOPRIGHT;
}
void x264_macroblock_cache_save( x264_t *h )
{
const int i_mb_xy = h->mb.i_mb_xy;
- const int i_mb_type = h->mb.i_type;
+ const int i_mb_type = x264_mb_type_fix[h->mb.i_type];
const int s8x8 = h->mb.i_b8_stride;
const int s4x4 = h->mb.i_b4_stride;
const int i_mb_4x4 = h->mb.i_b4_xy;
}
}
+ h->mb.mb_transform_size[i_mb_xy] = h->mb.b_transform_8x8;
+
if( !IS_INTRA( i_mb_type ) )
{
int i_list;
/* XXX mb_type isn't the one written in the bitstream -> only internal usage */
-#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_16x16 )
+#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_8x8 || (type) == I_16x16 )
#define IS_SKIP(type) ( (type) == P_SKIP || (type) == B_SKIP )
#define IS_DIRECT(type) ( (type) == B_DIRECT )
enum mb_class_e
{
I_4x4 = 0,
- I_16x16 = 1,
- I_PCM = 2,
-
- P_L0 = 3,
- P_8x8 = 4,
- P_SKIP = 5,
-
- B_DIRECT = 6,
- B_L0_L0 = 7,
- B_L0_L1 = 8,
- B_L0_BI = 9,
- B_L1_L0 = 10,
- B_L1_L1 = 11,
- B_L1_BI = 12,
- B_BI_L0 = 13,
- B_BI_L1 = 14,
- B_BI_BI = 15,
- B_8x8 = 16,
- B_SKIP = 17,
+ I_8x8 = 1,
+ I_16x16 = 2,
+ I_PCM = 3,
+
+ P_L0 = 4,
+ P_8x8 = 5,
+ P_SKIP = 6,
+
+ B_DIRECT = 7,
+ B_L0_L0 = 8,
+ B_L0_L1 = 9,
+ B_L0_BI = 10,
+ B_L1_L0 = 11,
+ B_L1_L1 = 12,
+ B_L1_BI = 13,
+ B_BI_L0 = 14,
+ B_BI_L1 = 15,
+ B_BI_BI = 16,
+ B_8x8 = 17,
+ B_SKIP = 18,
+};
+static const int x264_mb_type_fix[19] =
+{
+ I_4x4, I_4x4, I_16x16, I_PCM,
+ P_L0, P_8x8, P_SKIP,
+ B_DIRECT, B_L0_L0, B_L0_L1, B_L0_BI, B_L1_L0, B_L1_L1,
+ B_L1_BI, B_BI_L0, B_BI_L1, B_BI_BI, B_8x8, B_SKIP
};
-static const int x264_mb_type_list0_table[18][2] =
+static const int x264_mb_type_list0_table[19][2] =
{
- {0,0}, {0,0}, {0,0}, /* INTRA */
+ {0,0}, {0,0}, {0,0}, {0,0}, /* INTRA */
{1,1}, /* P_L0 */
{0,0}, /* P_8x8 */
{1,1}, /* P_SKIP */
{0,0}, /* B_8x8 */
{0,0} /* B_SKIP */
};
-static const int x264_mb_type_list1_table[18][2] =
+static const int x264_mb_type_list1_table[19][2] =
{
- {0,0}, {0,0}, {0,0}, /* INTRA */
+ {0,0}, {0,0}, {0,0}, {0,0}, /* INTRA */
{0,0}, /* P_L0 */
{0,0}, /* P_8x8 */
{0,0}, /* P_SKIP */
void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int i_qscale );
void x264_mb_dequant_2x2_dc( int16_t dct[2][2], int i_qscale );
void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale );
+void x264_mb_dequant_8x8( int16_t dct[8][8], int i_qscale );
/* x264_mb_predict_mv_16x16:
* set mvp with predicted mv for D_16x16 block
int x264_mb_predict_intra4x4_mode( x264_t *h, int idx );
int x264_mb_predict_non_zero_code( x264_t *h, int idx );
+int x264_mb_transform_8x8_allowed( x264_t *h, int i_mb_type );
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale );
+void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale );
void x264_mb_mc( x264_t *h );
}
}
}
+static inline void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode )
+{
+ int *cache = &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y];
+ cache[0] = cache[1] = cache[8] = cache[9] = i_mode;
+}
#endif
#include <stdlib.h>
#include <stdarg.h>
-#include "x264.h"
-#include "predict.h"
+#include "common.h"
+#include "macroblock.h"
#ifdef _MSC_VER
#undef HAVE_MMXEXT /* not finished now */
/****************************************************************************
* 8x8 prediction for intra chroma block DC, H, V, P
****************************************************************************/
-static void predict_8x8_dc_128( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_128( uint8_t *src, int i_stride )
{
int x,y;
src += i_stride;
}
}
-static void predict_8x8_dc_left( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
{
int x,y;
int dc0 = 0, dc1 = 0;
src += i_stride;
}
}
-static void predict_8x8_dc_top( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
{
int x,y;
int dc0 = 0, dc1 = 0;
src += i_stride;
}
}
-static void predict_8x8_dc( uint8_t *src, int i_stride )
+static void predict_8x8c_dc( uint8_t *src, int i_stride )
{
int x,y;
int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
}
}
-static void predict_8x8_h( uint8_t *src, int i_stride )
+static void predict_8x8c_h( uint8_t *src, int i_stride )
{
int i,j;
src += i_stride;
}
}
-static void predict_8x8_v( uint8_t *src, int i_stride )
+static void predict_8x8c_v( uint8_t *src, int i_stride )
{
int i,j;
}
}
-static void predict_8x8_p( uint8_t *src, int i_stride )
+static void predict_8x8c_p( uint8_t *src, int i_stride )
{
int i;
int x,y;
}
/****************************************************************************
- * 4x4 prediction for intra luma block DC, H, V, P
+ * 4x4 prediction for intra luma block
****************************************************************************/
static void predict_4x4_dc_128( uint8_t *src, int i_stride )
{
src[3*i_stride+3]= l3;
}
+/****************************************************************************
+ * 8x8 prediction for intra luma block
+ ****************************************************************************/
+
+#define SRC(x,y) src[(x)+(y)*i_stride]
+#define PL(y) \
+ const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_LEFT \
+ const int l0 = ((i_neighbor&MB_TOPLEFT ? SRC(-1,-1) : SRC(-1,0)) \
+ + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
+ PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
+ const int l7 = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2;
+
+#define PT(x) \
+ const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOP \
+ const int t0 = ((i_neighbor&MB_TOPLEFT ? SRC(-1,-1) : SRC(0,-1)) \
+ + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
+ PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
+ const int t7 = ((i_neighbor&MB_TOPRIGHT ? SRC(8,-1) : SRC(7,-1)) \
+ + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2; \
+
+#define PTR(x) \
+ t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOPRIGHT \
+ int t8, t9, t10, t11, t12, t13, t14, t15; \
+ if(i_neighbor&MB_TOPRIGHT) { \
+ PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
+ t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
+ } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
+
+#define PREDICT_8x8_LOAD_TOPLEFT \
+ const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2;
+
+#define PREDICT_8x8_DC(v) \
+ int y; \
+ for( y = 0; y < 8; y++ ) { \
+ ((uint32_t*)src)[0] = \
+ ((uint32_t*)src)[1] = v; \
+ src += i_stride; \
+ }
+
+static void predict_8x8_dc_128( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_DC(0x80808080);
+}
+static void predict_8x8_dc_left( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_LEFT;
+ const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
+ PREDICT_8x8_DC(dc);
+}
+static void predict_8x8_dc_top( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_TOP;
+ const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
+ PREDICT_8x8_DC(dc);
+}
+static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOP;
+ const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
+ +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
+ PREDICT_8x8_DC(dc);
+}
+static void predict_8x8_h( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_LEFT;
+#define ROW(y) ((uint32_t*)(src+y*i_stride))[0] =\
+ ((uint32_t*)(src+y*i_stride))[1] = 0x01010101U * l##y
+ ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
+#undef ROW
+}
+static void predict_8x8_v( uint8_t *src, int i_stride, int i_neighbor )
+{
+ int y;
+ PREDICT_8x8_LOAD_TOP;
+ src[0] = t0;
+ src[1] = t1;
+ src[2] = t2;
+ src[3] = t3;
+ src[4] = t4;
+ src[5] = t5;
+ src[6] = t6;
+ src[7] = t7;
+ for( y = 1; y < 8; y++ )
+ *(uint64_t*)(src+y*i_stride) = *(uint64_t*)src;
+}
+static void predict_8x8_ddl( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_TOPRIGHT;
+ SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+ SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
+ SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
+ SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
+ SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
+ SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
+ SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
+ SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
+ SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
+ SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
+}
+static void predict_8x8_ddr( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOPLEFT;
+ SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
+ SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+ SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
+ SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+ SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
+ SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+ SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
+ SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
+ SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
+ SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+
+}
+static void predict_8x8_vr( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOPLEFT;
+ /* produce warning as l7 is unused */
+ SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
+ SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+ SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
+ SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+ SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
+ SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+ SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
+ SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
+ SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
+ SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
+ SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
+ SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
+ SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
+ SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
+ SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
+ SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+ SRC(7,0)= (t6 + t7 + 1) >> 1;
+}
+static void predict_8x8_hd( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOPLEFT;
+ /* produce warning as t7 is unused */
+ SRC(0,7)= (l6 + l7 + 1) >> 1;
+ SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
+ SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
+ SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
+ SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
+ SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
+ SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
+ SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
+ SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
+ SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
+ SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
+ SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
+ SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
+ SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
+ SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
+ SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
+ SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
+ SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
+ SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
+ SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
+ SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
+ SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
+}
+static void predict_8x8_vl( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_TOPRIGHT;
+ SRC(0,0)= (t0 + t1 + 1) >> 1;
+ SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
+ SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
+ SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
+ SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
+ SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
+ SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+ SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
+ SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
+ SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
+ SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
+ SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
+ SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
+ SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
+ SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
+ SRC(7,6)= (t10 + t11 + 1) >> 1;
+ SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
+}
+static void predict_8x8_hu( uint8_t *src, int i_stride, int i_neighbor )
+{
+ PREDICT_8x8_LOAD_LEFT;
+ SRC(0,0)= (l0 + l1 + 1) >> 1;
+ SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
+ SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
+ SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
+ SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
+ SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
+ SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
+ SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
+ SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
+ SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
+ SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
+ SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
+ SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
+ SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
+ SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
+ SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
+ SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
+ SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
+}
+
/****************************************************************************
* Exported functions:
****************************************************************************/
#endif
}
-void x264_predict_8x8_init( int cpu, x264_predict_t pf[7] )
+void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
{
- pf[I_PRED_CHROMA_V ] = predict_8x8_v;
- pf[I_PRED_CHROMA_H ] = predict_8x8_h;
- pf[I_PRED_CHROMA_DC] = predict_8x8_dc;
- pf[I_PRED_CHROMA_P ] = predict_8x8_p;
- pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8_dc_left;
- pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8_dc_top;
- pf[I_PRED_CHROMA_DC_128 ]= predict_8x8_dc_128;
+ pf[I_PRED_CHROMA_V ] = predict_8x8c_v;
+ pf[I_PRED_CHROMA_H ] = predict_8x8c_h;
+ pf[I_PRED_CHROMA_DC] = predict_8x8c_dc;
+ pf[I_PRED_CHROMA_P ] = predict_8x8c_p;
+ pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left;
+ pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top;
+ pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT )
{
- x264_predict_8x8_init_mmxext( pf );
+ x264_predict_8x8c_init_mmxext( pf );
}
#endif
}
+void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12] )
+{
+ pf[I_PRED_8x8_V] = predict_8x8_v;
+ pf[I_PRED_8x8_H] = predict_8x8_h;
+ pf[I_PRED_8x8_DC] = predict_8x8_dc;
+ pf[I_PRED_8x8_DDL] = predict_8x8_ddl;
+ pf[I_PRED_8x8_DDR] = predict_8x8_ddr;
+ pf[I_PRED_8x8_VR] = predict_8x8_vr;
+ pf[I_PRED_8x8_HD] = predict_8x8_hd;
+ pf[I_PRED_8x8_VL] = predict_8x8_vl;
+ pf[I_PRED_8x8_HU] = predict_8x8_hu;
+ pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left;
+ pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top;
+ pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128;
+}
+
void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
{
pf[I_PRED_4x4_V] = predict_4x4_v;
#define _PREDICT_H 1
typedef void (*x264_predict_t)( uint8_t *src, int i_stride );
+typedef void (*x264_predict8x8_t)( uint8_t *src, int i_stride, int i_neighbor );
enum intra_chroma_pred_e
{
I_PRED_CHROMA_DC_TOP = 5,
I_PRED_CHROMA_DC_128 = 6
};
-static const int x264_mb_pred_mode8x8_fix[7] =
+static const int x264_mb_pred_mode8x8c_fix[7] =
{
I_PRED_CHROMA_DC, I_PRED_CHROMA_H, I_PRED_CHROMA_V, I_PRED_CHROMA_P,
I_PRED_CHROMA_DC, I_PRED_CHROMA_DC,I_PRED_CHROMA_DC
I_PRED_4x4_DC_TOP = 10,
I_PRED_4x4_DC_128 = 11,
};
-static const int x264_mb_pred_mode4x4_fix[12] =
+static const int x264_mb_pred_mode4x4_fix[13] =
{
+ -1,
I_PRED_4x4_V, I_PRED_4x4_H, I_PRED_4x4_DC,
I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR,
I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU,
I_PRED_4x4_DC, I_PRED_4x4_DC, I_PRED_4x4_DC
};
+#define x264_mb_pred_mode4x4_fix(t) x264_mb_pred_mode4x4_fix[(t)+1]
+
+/* must use the same numbering as intra4x4_pred_e */
+enum intra8x8_pred_e
+{
+ I_PRED_8x8_V = 0,
+ I_PRED_8x8_H = 1,
+ I_PRED_8x8_DC = 2,
+ I_PRED_8x8_DDL= 3,
+ I_PRED_8x8_DDR= 4,
+ I_PRED_8x8_VR = 5,
+ I_PRED_8x8_HD = 6,
+ I_PRED_8x8_VL = 7,
+ I_PRED_8x8_HU = 8,
+
+ I_PRED_8x8_DC_LEFT = 9,
+ I_PRED_8x8_DC_TOP = 10,
+ I_PRED_8x8_DC_128 = 11,
+};
void x264_predict_16x16_init ( int cpu, x264_predict_t pf[7] );
-void x264_predict_8x8_init ( int cpu, x264_predict_t pf[7] );
+void x264_predict_8x8c_init ( int cpu, x264_predict_t pf[7] );
void x264_predict_4x4_init ( int cpu, x264_predict_t pf[12] );
+void x264_predict_8x8_init ( int cpu, x264_predict8x8_t pf[12] );
#endif
{
PROFILE_BASELINE = 66,
PROFILE_MAIN = 77,
- PROFILE_EXTENTED = 88
+ PROFILE_EXTENTED = 88,
+ PROFILE_HIGH = 100,
+ PROFILE_HIGH10 = 110,
+ PROFILE_HIGH422 = 122,
+ PROFILE_HIGH444 = 144
};
typedef struct
int b_deblocking_filter_control;
int b_constrained_intra_pred;
int b_redundant_pic_cnt;
+
+ int b_transform_8x8_mode;
+
} x264_pps_t;
#endif
static const stringlist_t mb_types[] = {
/* Block types marked as NULL will not be drawn */
{ I_4x4 , "red" },
+ { I_8x8 , "#ff5640" },
{ I_16x16 , "#ff8060" },
{ I_PCM , "violet" },
{ P_L0 , "SlateBlue" },
}
}
- if (v->i_type==I_4x4 || v->i_type==I_16x16 || v->i_type==I_PCM) {
+ if (IS_INTRA(v->i_type) || v->i_type==I_PCM) {
/* Intra coded */
if (v->i_type==I_16x16) {
switch (v->i_intra16x16_pred_mode) {
break;
}
}
- if (v->i_type==I_4x4) {
- for (i=0; i<4; i++) for (j=0; j<4; j++) {
+ if (v->i_type==I_4x4 || v->i_type==I_8x8) {
+ const int di = v->i_type==I_8x8 ? 2 : 1;
+ const int zoom2 = zoom * di;
+ for (i=0; i<4; i+=di) for (j=0; j<4; j+=di) {
const int x0 = x + j*4*zoom;
const int y0 = y + i*4*zoom;
- if (drawbox) disp_rect(0, x0, y0, x0+4*zoom, y0+4*zoom);
+ if (drawbox) disp_rect(0, x0, y0, x0+4*zoom2, y0+4*zoom2);
switch (v->intra4x4_pred_mode[i][j]) {
case I_PRED_4x4_V: /* Vertical */
- disp_line(0, x0+0*zoom, y0+1*zoom, x0+4*zoom, y0+1*zoom);
+ disp_line(0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2);
break;
case I_PRED_4x4_H: /* Horizontal */
- disp_line(0, x0+1*zoom, y0+0*zoom, x0+1*zoom, y0+4*zoom);
+ disp_line(0, x0+1*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2);
break;
case I_PRED_4x4_DC: /* DC, average from top and left sides */
case I_PRED_4x4_DC_LEFT:
case I_PRED_4x4_DC_TOP:
case I_PRED_4x4_DC_128:
- disp_line(0, x0+1*zoom, y0+1*zoom, x0+4*zoom, y0+1*zoom);
- disp_line(0, x0+1*zoom, y0+1*zoom, x0+1*zoom, y0+4*zoom);
+ disp_line(0, x0+1*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2);
+ disp_line(0, x0+1*zoom2, y0+1*zoom2, x0+1*zoom2, y0+4*zoom2);
break;
case I_PRED_4x4_DDL: /* Topright-bottomleft */
- disp_line(0, x0+0*zoom, y0+0*zoom, x0+4*zoom, y0+4*zoom);
+ disp_line(0, x0+0*zoom2, y0+0*zoom2, x0+4*zoom2, y0+4*zoom2);
break;
case I_PRED_4x4_DDR: /* Topleft-bottomright */
- disp_line(0, x0+0*zoom, y0+4*zoom, x0+4*zoom, y0+0*zoom);
+ disp_line(0, x0+0*zoom2, y0+4*zoom2, x0+4*zoom2, y0+0*zoom2);
break;
case I_PRED_4x4_VR: /* Mix of topleft-bottomright and vertical */
- disp_line(0, x0+0*zoom, y0+2*zoom, x0+4*zoom, y0+1*zoom);
+ disp_line(0, x0+0*zoom2, y0+2*zoom2, x0+4*zoom2, y0+1*zoom2);
break;
case I_PRED_4x4_HD: /* Mix of topleft-bottomright and horizontal */
- disp_line(0, x0+2*zoom, y0+0*zoom, x0+1*zoom, y0+4*zoom);
+ disp_line(0, x0+2*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2);
break;
case I_PRED_4x4_VL: /* Mix of topright-bottomleft and vertical */
- disp_line(0, x0+0*zoom, y0+1*zoom, x0+4*zoom, y0+2*zoom);
+ disp_line(0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+2*zoom2);
break;
case I_PRED_4x4_HU: /* Mix of topright-bottomleft and horizontal */
- disp_line(0, x0+1*zoom, y0+0*zoom, x0+2*zoom, y0+4*zoom);
+ disp_line(0, x0+1*zoom2, y0+0*zoom2, x0+2*zoom2, y0+4*zoom2);
break;
}
}
/* Take some shortcuts in intra search if intra is deemed unlikely */
int b_fast_intra;
- /* Luma part 16x16 and 4x4 modes stats */
+ /* Luma part */
int i_sad_i16x16;
int i_predict16x16;
+ int i_sad_i8x8;
+ int i_predict8x8[2][2];
+
int i_sad_i4x4;
int i_predict4x4[4][4];
/* Chroma part */
- int i_sad_i8x8;
- int i_predict8x8;
+ int i_sad_i8x8chroma;
+ int i_predict8x8chroma;
/* II: Inter part P/B frame */
x264_mb_analysis_list_t l0;
};
/* TODO: calculate CABAC costs */
-static const int i_mb_b_cost_table[18] = {
- 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
+static const int i_mb_b_cost_table[19] = {
+ 9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
};
static const int i_mb_b16x8_cost_table[16] = {
0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
&& h->mb.i_subpel_refine >= 5;
a->b_fast_intra = 0;
+ h->mb.b_transform_8x8 = 0;
+
/* I: Intra part */
a->i_sad_i16x16 =
+ a->i_sad_i8x8 =
a->i_sad_i4x4 =
- a->i_sad_i8x8 = COST_MAX;
+ a->i_sad_i8x8chroma = COST_MAX;
/* II: Inter part P/B frame */
if( h->sh.i_type != SLICE_TYPE_I )
|| IS_INTRA( h->mb.i_mb_type_topleft )
|| IS_INTRA( h->mb.i_mb_type_topright )
|| (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] ))
- || (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16])) )
+ || (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) )
{ /* intra is likely */ }
else
{
}
/* Max = 4 */
-static void predict_8x8_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
+static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
{
if( i_neighbour & MB_TOPLEFT )
{
}
}
-/* MAX = 8 */
-static void predict_4x4_mode_available( unsigned int i_neighbour, int idx, int *mode, int *pi_count )
+/* MAX = 9 */
+static void predict_4x4_mode_available( unsigned int i_neighbour,
+ int *mode, int *pi_count )
{
- int b_a, b_b, b_c;
- static const unsigned int needmb[16] =
- {
- MB_LEFT|MB_TOP, MB_TOP,
- MB_LEFT, MB_PRIVATE,
- MB_TOP, MB_TOP|MB_TOPRIGHT,
- 0, MB_PRIVATE,
- MB_LEFT, 0,
- MB_LEFT, MB_PRIVATE,
- 0, MB_PRIVATE,
- 0, MB_PRIVATE
- };
-
- /* FIXME even when b_c == 0 there is some case where missing pixels
+ /* FIXME even when b_tr == 0 there is some case where missing pixels
* are emulated and thus more mode are available TODO
* analysis and encode should be fixed too */
- b_a = (needmb[idx]&i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
- b_b = (needmb[idx]&i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
- b_c = (needmb[idx]&i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
+ int b_l = i_neighbour & MB_LEFT;
+ int b_t = i_neighbour & MB_TOP;
+ int b_tr = i_neighbour & MB_TOPRIGHT;
- if( b_a && b_b )
+ if( b_l && b_t )
{
*mode++ = I_PRED_4x4_DC;
*mode++ = I_PRED_4x4_H;
*mode++ = I_PRED_4x4_VR;
*mode++ = I_PRED_4x4_HD;
*mode++ = I_PRED_4x4_HU;
-
*pi_count = 7;
-
- if( b_c )
- {
- *mode++ = I_PRED_4x4_DDL;
- *mode++ = I_PRED_4x4_VL;
- (*pi_count) += 2;
- }
}
- else if( b_a && !b_b )
+ else if( b_l )
{
*mode++ = I_PRED_4x4_DC_LEFT;
*mode++ = I_PRED_4x4_H;
*mode++ = I_PRED_4x4_HU;
*pi_count = 3;
}
- else if( !b_a && b_b )
+ else if( b_t )
{
*mode++ = I_PRED_4x4_DC_TOP;
*mode++ = I_PRED_4x4_V;
*mode++ = I_PRED_4x4_DC_128;
*pi_count = 1;
}
+
+ if( b_t && b_tr )
+ {
+ *mode++ = I_PRED_4x4_DDL;
+ *mode++ = I_PRED_4x4_VL;
+ (*pi_count) += 2;
+ }
}
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cost_inter )
p_dst_by = p_dst + 4 * x + 4 * y * i_stride;
i_best = COST_MAX;
- predict_4x4_mode_available( h->mb.i_neighbour, idx, predict_mode, &i_max );
+ predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );
for( i = 0; i < i_max; i++ )
{
int i_sad;
}
res->i_sad_i4x4 += i_best;
- /* we need to encode this mb now (for next ones) */
+ /* we need to encode this block now (for next ones) */
h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_stride );
x264_mb_encode_i4x4( h, idx, res->i_qp );
- /* we need to store the 'fixed' version */
- h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] =
- x264_mb_pred_mode4x4_fix[res->i_predict4x4[x][y]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = res->i_predict4x4[x][y];
}
res->i_sad_i4x4 += res->i_lambda * 24; /* from JVT (SATD0) */
if( h->sh.i_type == SLICE_TYPE_B )
res->i_sad_i4x4 += res->i_lambda * i_mb_b_cost_table[I_4x4];
}
+
+ /* 8x8 prediction selection */
+ if( flags & X264_ANALYSE_I8x8 )
+ {
+ res->i_sad_i8x8 = 0;
+ for( idx = 0; idx < 4; idx++ )
+ {
+ uint8_t *p_src_by;
+ uint8_t *p_dst_by;
+ int i_best;
+ int x, y;
+ int i_pred_mode;
+
+ i_pred_mode= x264_mb_predict_intra4x4_mode( h, 4*idx );
+ x = idx&1;
+ y = idx>>1;
+
+ p_src_by = p_src + 8 * x + 8 * y * i_stride;
+ p_dst_by = p_dst + 8 * x + 8 * y * i_stride;
+
+ i_best = COST_MAX;
+ predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max );
+ for( i = 0; i < i_max; i++ )
+ {
+ int i_sad;
+ int i_mode;
+
+ i_mode = predict_mode[i];
+ h->predict_8x8[i_mode]( p_dst_by, i_stride, h->mb.i_neighbour );
+
+ i_sad = h->pixf.satd[PIXEL_8x8]( p_dst_by, i_stride,
+ p_src_by, i_stride );
+
+ i_sad += res->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix[i_mode] ? 1 : 4);
+
+ if( i_best > i_sad )
+ {
+ res->i_predict8x8[x][y] = i_mode;
+ i_best = i_sad;
+ }
+ }
+ res->i_sad_i8x8 += i_best;
+
+ /* we need to encode this block now (for next ones) */
+ h->predict_8x8[res->i_predict8x8[x][y]]( p_dst_by, i_stride, h->mb.i_neighbour );
+ x264_mb_encode_i8x8( h, idx, res->i_qp );
+
+ x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, res->i_predict4x4[x][y] );
+ }
+// res->i_sad_i8x8 += res->i_lambda * something; // FIXME
+ if( h->sh.i_type == SLICE_TYPE_B )
+ res->i_sad_i8x8 += res->i_lambda * i_mb_b_cost_table[I_8x8];
+ }
}
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
uint8_t *p_dstc[2], *p_srcc[2];
int i_stride[2];
- if( res->i_sad_i8x8 < COST_MAX )
+ if( res->i_sad_i8x8chroma < COST_MAX )
return;
/* 8x8 prediction selection for chroma */
i_stride[0] = h->mb.pic.i_stride[1];
i_stride[1] = h->mb.pic.i_stride[2];
- predict_8x8_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
- res->i_sad_i8x8 = COST_MAX;
+ predict_8x8chroma_mode_available( h->mb.i_neighbour, predict_mode, &i_max );
+ res->i_sad_i8x8chroma = COST_MAX;
for( i = 0; i < i_max; i++ )
{
int i_sad;
i_mode = predict_mode[i];
/* we do the prediction */
- h->predict_8x8[i_mode]( p_dstc[0], i_stride[0] );
- h->predict_8x8[i_mode]( p_dstc[1], i_stride[1] );
+ h->predict_8x8c[i_mode]( p_dstc[0], i_stride[0] );
+ h->predict_8x8c[i_mode]( p_dstc[1], i_stride[1] );
/* we calculate the cost */
i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_stride[0],
p_srcc[0], i_stride[0] ) +
h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_stride[1],
p_srcc[1], i_stride[1] ) +
- res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix[i_mode] );
+ res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
/* if i_score is lower it is better */
- if( res->i_sad_i8x8 > i_sad )
+ if( res->i_sad_i8x8chroma > i_sad )
{
- res->i_predict8x8 = i_mode;
- res->i_sad_i8x8 = i_sad;
+ res->i_predict8x8chroma = i_mode;
+ res->i_sad_i8x8chroma = i_sad;
}
}
+
+ h->mb.i_chroma_pred_mode = res->i_predict8x8chroma;
}
#define LOAD_FENC( m, src, xoff, yoff) \
/*--------------------------- Do the analysis ---------------------------*/
if( h->sh.i_type == SLICE_TYPE_I )
{
+ int i_cost;
x264_mb_analyse_intra( h, &analysis, COST_MAX );
- if( analysis.i_sad_i4x4 < analysis.i_sad_i16x16 )
+ i_cost = analysis.i_sad_i16x16;
+ h->mb.i_type = I_16x16;
+ if( analysis.i_sad_i4x4 < i_cost )
+ {
+ i_cost = analysis.i_sad_i4x4;
h->mb.i_type = I_4x4;
- else
- h->mb.i_type = I_16x16;
+ }
+ if( analysis.i_sad_i8x8 < i_cost )
+ h->mb.i_type = I_8x8;
}
else if( h->sh.i_type == SLICE_TYPE_P )
{
|| ( analysis.i_sad_i4x4 < i_cost )))
{
x264_mb_analyse_intra_chroma( h, &analysis );
- analysis.i_sad_i16x16 += analysis.i_sad_i8x8;
- analysis.i_sad_i4x4 += analysis.i_sad_i8x8;
+ analysis.i_sad_i16x16 += analysis.i_sad_i8x8chroma;
+ analysis.i_sad_i4x4 += analysis.i_sad_i8x8chroma;
}
i_intra_type = I_16x16;
}
/*-------------------- Update MB from the analysis ----------------------*/
- h->mb.type[h->mb.i_mb_xy] = h->mb.i_type;
+ h->mb.type[h->mb.i_mb_xy] = x264_mb_type_fix[h->mb.i_type];
switch( h->mb.i_type )
{
case I_4x4:
}
x264_mb_analyse_intra_chroma( h, &analysis );
- h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
+ break;
+ case I_8x8:
+ h->mb.b_transform_8x8 = 1;
+ for( i = 0; i < 4; i++ )
+ x264_macroblock_cache_intra8x8_pred( h, 2*(i&1), 2*(i>>1),
+ analysis.i_predict8x8[i&1][i>>1] );
+
+ x264_mb_analyse_intra_chroma( h, &analysis );
break;
case I_16x16:
h->mb.i_intra16x16_pred_mode = analysis.i_predict16x16;
-
x264_mb_analyse_intra_chroma( h, &analysis );
- h->mb.i_chroma_pred_mode = analysis.i_predict8x8;
break;
case P_L0:
static inline void x264_cabac_mb_type_intra( x264_t *h, int i_mb_type,
int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
{
- if( i_mb_type == I_4x4 )
+ if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
{
x264_cabac_encode_decision( &h->cabac, ctx0, 0 );
}
static void x264_cabac_mb_type( x264_t *h )
{
- const int i_mb_type = h->mb.i_type;
+ const int i_mb_type = x264_mb_type_fix[h->mb.i_type];
if( h->sh.i_type == SLICE_TYPE_I )
{
x264_cabac_encode_decision( &h->cabac, 69, (i_mode >> 2)&0x01 );
}
}
-static void x264_cabac_mb_intra8x8_pred_mode( x264_t *h )
+
+static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h )
{
const int i_mode = h->mb.i_chroma_pred_mode;
int ctx = 0;
}
}
+static inline void x264_cabac_mb_transform_size( x264_t *h )
+{
+ int ctx = ( h->mb.cache.transform_size[0] == 1 )
+ + ( h->mb.cache.transform_size[1] == 1 );
+ x264_cabac_encode_decision( &h->cabac, 399 + ctx, h->mb.b_transform_8x8 );
+}
+
static inline void x264_cabac_mb_ref( x264_t *h, int i_list, int idx )
{
const int i8 = x264_scan8[idx];
static void block_residual_write_cabac( x264_t *h, int i_ctxBlockCat, int i_idx, int *l, int i_count )
{
- static const int significant_coeff_flag_offset[5] = { 0, 15, 29, 44, 47 };
- static const int last_significant_coeff_flag_offset[5] = { 0, 15, 29, 44, 47 };
- static const int coeff_abs_level_m1_offset[5] = { 0, 10, 20, 30, 39 };
-
- int i_coeff_abs_m1[16];
- int i_coeff_sign[16];
+ static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
+ static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
+ static const int coeff_abs_level_m1_offset[6] = { 0, 10, 20, 30, 39, 199 };
+ static const int significant_coeff_flag_offset_8x8[63] = {
+ 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
+ 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
+ 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
+ 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
+ };
+ static const int last_significant_coeff_flag_offset_8x8[63] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+ };
+
+ int i_coeff_abs_m1[64];
+ int i_coeff_sign[64];
int i_coeff = 0;
int i_last = 0;
* 2-> Luma4x4 i_idx = luma4x4idx
* 3-> DC Chroma i_idx = iCbCr
* 4-> AC Chroma i_idx = 4 * iCbCr + chroma4x4idx
+ * 5-> Luma8x8 i_idx = luma8x8idx
*/
- //fprintf( stderr, "l[] = " );
for( i = 0; i < i_count; i++ )
{
- //fprintf( stderr, "%d ", l[i] );
if( l[i] != 0 )
{
i_coeff_abs_m1[i_coeff] = abs( l[i] ) - 1;
- i_coeff_sign[i_coeff] = ( l[i] < 0 ? 1 : 0);
+ i_coeff_sign[i_coeff] = ( l[i] < 0 );
i_coeff++;
i_last = i;
}
}
- //fprintf( stderr, "\n" );
- if( i_coeff == 0 )
+ if( i_count != 64 )
{
- /* codec block flag */
- x264_cabac_encode_decision( &h->cabac, 85 + x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx ), 0 );
- return;
+ /* coded block flag */
+ x264_cabac_encode_decision( &h->cabac, 85 + x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx ), i_coeff != 0 );
+ if( i_coeff == 0 )
+ return;
}
- /* block coded */
- x264_cabac_encode_decision( &h->cabac, 85 + x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx ), 1 );
for( i = 0; i < i_count - 1; i++ )
{
- int i_ctxIdxInc;
+ int i_sig_ctxIdxInc;
+ int i_last_ctxIdxInc;
- i_ctxIdxInc = X264_MIN( i, i_count - 2 );
+ if( i_ctxBlockCat == 5 )
+ {
+ i_sig_ctxIdxInc = significant_coeff_flag_offset_8x8[i];
+ i_last_ctxIdxInc = last_significant_coeff_flag_offset_8x8[i];
+ }
+ else
+ i_sig_ctxIdxInc = i_last_ctxIdxInc = i;
if( l[i] != 0 )
{
- x264_cabac_encode_decision( &h->cabac, 105 + significant_coeff_flag_offset[i_ctxBlockCat] + i_ctxIdxInc, 1 );
- x264_cabac_encode_decision( &h->cabac, 166 + last_significant_coeff_flag_offset[i_ctxBlockCat] + i_ctxIdxInc, i == i_last ? 1 : 0 );
+ x264_cabac_encode_decision( &h->cabac, 105 + significant_coeff_flag_offset[i_ctxBlockCat] + i_sig_ctxIdxInc, 1 );
+ x264_cabac_encode_decision( &h->cabac, 166 + last_significant_coeff_flag_offset[i_ctxBlockCat] + i_last_ctxIdxInc, i == i_last ? 1 : 0 );
}
else
{
- x264_cabac_encode_decision( &h->cabac, 105 + significant_coeff_flag_offset[i_ctxBlockCat] + i_ctxIdxInc, 0 );
+ x264_cabac_encode_decision( &h->cabac, 105 + significant_coeff_flag_offset[i_ctxBlockCat] + i_sig_ctxIdxInc, 0 );
}
if( i == i_last )
{
x264_cabac_encode_decision( &h->cabac, 227 + i_ctxIdxInc, 1 );
i_ctxIdxInc = 5 + X264_MIN( 4, i_abslevelgt1 ) + coeff_abs_level_m1_offset[i_ctxBlockCat];
for( j = 0; j < i_prefix - 1; j++ )
- {
x264_cabac_encode_decision( &h->cabac, 227 + i_ctxIdxInc, 1 );
- }
if( i_prefix < 14 )
- {
x264_cabac_encode_decision( &h->cabac, 227 + i_ctxIdxInc, 0 );
- }
}
/* suffix */
if( i_coeff_abs_m1[i] >= 14 )
}
x264_cabac_encode_bypass( &h->cabac, 0 );
while( k-- )
- {
x264_cabac_encode_bypass( &h->cabac, (i_suffix >> k)&0x01 );
- }
}
/* write sign */
x264_cabac_encode_bypass( &h->cabac, i_coeff_sign[i] );
-
if( i_coeff_abs_m1[i] == 0 )
- {
i_abslevel1++;
- }
else
- {
i_abslevelgt1++;
- }
}
}
if( IS_INTRA( i_mb_type ) )
{
- /* Prediction */
- if( i_mb_type == I_4x4 )
+ if( h->pps->b_transform_8x8_mode && i_mb_type != I_16x16 )
+ x264_cabac_mb_transform_size( h );
+
+ if( i_mb_type != I_16x16 )
{
- for( i = 0; i < 16; i++ )
+ int di = (i_mb_type == I_8x8) ? 4 : 1;
+ for( i = 0; i < 16; i += di )
{
const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
const int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
x264_cabac_mb_intra4x4_pred_mode( h, i_pred, i_mode );
}
}
- x264_cabac_mb_intra8x8_pred_mode( h );
+
+ x264_cabac_mb_intra_chroma_pred_mode( h );
}
else if( i_mb_type == P_L0 )
{
if( ( i_list ? h->sh.i_num_ref_idx_l1_active : h->sh.i_num_ref_idx_l0_active ) == 1 )
continue;
for( i = 0; i < 4; i++ )
- {
if( x264_mb_partition_listX_table[i_list][ h->mb.i_sub_partition[i] ] )
- {
x264_cabac_mb_ref( h, i_list, 4*i );
- }
- }
}
x264_cabac_mb8x8_mvd( h, 0 );
x264_cabac_mb_cbp_chroma( h );
}
+ if( h->pps->b_transform_8x8_mode && h->mb.i_cbp_luma && !IS_INTRA(i_mb_type)
+ && x264_mb_transform_8x8_allowed( h, i_mb_type ) )
+ {
+ x264_cabac_mb_transform_size( h );
+ }
+
if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 )
{
x264_cabac_mb_qp_delta( h );
/* DC Luma */
block_residual_write_cabac( h, 0, 0, h->dct.luma16x16_dc, 16 );
+ /* AC Luma */
if( h->mb.i_cbp_luma != 0 )
- {
- /* AC Luma */
for( i = 0; i < 16; i++ )
- {
block_residual_write_cabac( h, 1, i, h->dct.block[i].residual_ac, 15 );
- }
- }
+ }
+ else if( h->mb.b_transform_8x8 )
+ {
+ for( i = 0; i < 4; i++ )
+ if( h->mb.i_cbp_luma & ( 1 << i ) )
+ block_residual_write_cabac( h, 5, i, h->dct.luma8x8[i], 64 );
}
else
{
for( i = 0; i < 16; i++ )
- {
if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
- {
block_residual_write_cabac( h, 2, i, h->dct.block[i].luma4x4, 16 );
- }
- }
}
if( h->mb.i_cbp_chroma &0x03 ) /* Chroma DC residual present */
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
{
for( i = 0; i < 8; i++ )
- {
block_residual_write_cabac( h, 4, i, h->dct.block[16+i].residual_ac, 15 );
- }
}
}
}
return;
}
- else if( i_mb_type == I_4x4 )
+ else if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
{
+ int di = i_mb_type == I_8x8 ? 4 : 1;
bs_write_ue( s, i_mb_i_offset + 0 );
+ if( h->pps->b_transform_8x8_mode )
+ bs_write1( s, h->mb.b_transform_8x8 );
/* Prediction: Luma */
- for( i = 0; i < 16; i++ )
+ for( i = 0; i < 16; i += di )
{
int i_pred = x264_mb_predict_intra4x4_mode( h, i );
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
/* Coded block patern */
- if( i_mb_type == I_4x4 )
+ if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
{
bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
}
bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
}
+ /* transform size 8x8 flag */
+ if( h->pps->b_transform_8x8_mode && h->mb.i_cbp_luma && !IS_INTRA(i_mb_type)
+ && x264_mb_transform_8x8_allowed( h, i_mb_type ) )
+ {
+ bs_write1( s, h->mb.b_transform_8x8 );
+ }
+
/* write residual */
if( i_mb_type == I_16x16 )
{
{
bs_write_se( s, h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp );
+ /* shuffle 8x8 dct coeffs into 4x4 lists */
+ if( h->mb.b_transform_8x8 )
+ {
+ int i4;
+ for( i4 = 0; i4 < 16; i4++ )
+ {
+ for( i = 0; i < 16; i++ )
+ h->dct.block[i4].luma4x4[i] = h->dct.luma8x8[i4>>2][(i4&3)+i*4];
+ h->mb.cache.non_zero_count[x264_scan8[i4]] =
+ array_non_zero_count( h->dct.block[i4].luma4x4, 16 );
+ }
+ }
+
for( i = 0; i < 16; i++ )
{
if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
#endif
//#define DEBUG_MB_TYPE
-//#define DEBUG_DUMP_FRAME
+#define DEBUG_DUMP_FRAME
//#define DEBUG_BENCHMARK
#ifdef DEBUG_BENCHMARK
h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 5 );
if( !(h->param.analyse.inter & X264_ANALYSE_PSUB16x16) )
h->param.analyse.inter &= ~X264_ANALYSE_PSUB8x8;
+ if( !h->param.analyse.b_transform_8x8 )
+ {
+ h->param.analyse.inter &= ~X264_ANALYSE_I8x8;
+ h->param.analyse.intra &= ~X264_ANALYSE_I8x8;
+ }
h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 2048);
x264_t *x264_encoder_open ( x264_param_t *param )
{
x264_t *h = x264_malloc( sizeof( x264_t ) );
- int i, i_slice;
+ int i;
+
+ memset( h, 0, sizeof( x264_t ) );
/* Create a copy of param */
memcpy( &h->param, param, sizeof( x264_param_t ) );
/* init CPU functions */
x264_predict_16x16_init( h->param.cpu, h->predict_16x16 );
+ x264_predict_8x8c_init( h->param.cpu, h->predict_8x8c );
x264_predict_8x8_init( h->param.cpu, h->predict_8x8 );
x264_predict_4x4_init( h->param.cpu, h->predict_4x4 );
if( x264_ratecontrol_new( h ) < 0 )
return NULL;
- /* stat */
- for( i_slice = 0; i_slice < 5; i_slice++ )
- {
- h->stat.i_slice_count[i_slice] = 0;
- h->stat.i_slice_size[i_slice] = 0;
- h->stat.i_slice_qp[i_slice] = 0;
-
- h->stat.i_sqe_global[i_slice] = 0;
- h->stat.f_psnr_average[i_slice] = 0.0;
- h->stat.f_psnr_mean_y[i_slice] = h->stat.f_psnr_mean_u[i_slice] = h->stat.f_psnr_mean_v[i_slice] = 0.0;
-
- for( i = 0; i < 18; i++ )
- h->stat.i_mb_count[i_slice][i] = 0;
- }
-
x264_log( h, X264_LOG_INFO, "using cpu capabilities %s%s%s%s%s%s\n",
param->cpu&X264_CPU_MMX ? "MMX " : "",
param->cpu&X264_CPU_MMXEXT ? "MMXEXT " : "",
int i_skip;
int mb_xy;
+ /* init stats */
memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
/* Slice */
h->stat.i_slice_size[i_slice_type] += i_frame_size + NALU_OVERHEAD;
h->stat.i_slice_qp[i_slice_type] += i_global_qp;
- for( i = 0; i < 18; i++ )
+ for( i = 0; i < 19; i++ )
{
h->stat.i_mb_count[h->sh.i_type][i] += h->stat.frame.i_mb_count[i];
}
}
x264_log( h, X264_LOG_DEBUG,
- "frame=%4d QP=%i NAL=%d Slice:%c Poc:%-3d I4x4:%-4d I16x16:%-4d P:%-4d SKIP:%-4d size=%d bytes%s\n",
+ "frame=%4d QP=%i NAL=%d Slice:%c Poc:%-3d I4:%-4d I8:%-4d I16:%-4d P:%-4d SKIP:%-4d size=%d bytes%s\n",
h->i_frame - 1,
i_global_qp,
i_nal_ref_idc,
i_slice_type == SLICE_TYPE_I ? 'I' : (i_slice_type == SLICE_TYPE_P ? 'P' : 'B' ),
frame_psnr->i_poc,
h->stat.frame.i_mb_count[I_4x4],
+ h->stat.frame.i_mb_count[I_8x8],
h->stat.frame.i_mb_count[I_16x16],
h->stat.frame.i_mb_count_p,
h->stat.frame.i_mb_count_skip,
#ifdef DEBUG_MB_TYPE
{
- static const char mb_chars[] = { 'i', 'I', 'C', 'P', '8', 'S',
+ static const char mb_chars[] = { 'i', 'i', 'I', 'C', 'P', '8', 'S',
'D', '<', 'X', 'B', 'X', '>', 'B', 'B', 'B', 'B', '8', 'S' };
int mb_xy;
for( mb_xy = 0; mb_xy < h->sps->i_mb_width * h->sps->i_mb_height; mb_xy++ )
{
- if( h->mb.type[mb_xy] < 18 && h->mb.type[mb_xy] >= 0 )
+ if( h->mb.type[mb_xy] < 19 && h->mb.type[mb_xy] >= 0 )
fprintf( stderr, "%c ", mb_chars[ h->mb.type[mb_xy] ] );
else
fprintf( stderr, "? " );
const int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
- "slice I Avg I4x4:%.1f%% I16x16:%.1f%%\n",
+ "slice I Avg I4x4:%.1f%% I8x8:%.1f%% I16x16:%.1f%%\n",
i_mb_count[I_4x4] / i_count,
+ i_mb_count[I_8x8] / i_count,
i_mb_count[I_16x16]/ i_count );
}
if( h->stat.i_slice_count[SLICE_TYPE_P] > 0 )
const int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_P];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
- "slice P Avg I4x4:%.1f%% I16x16:%.1f%% P:%.1f%% P8x8:%.1f%% PSKIP:%.1f%%\n",
+ "slice P Avg I4x4:%.1f%% I8x8:%.1f%% I16x16:%.1f%% P:%.1f%% P8x8:%.1f%% PSKIP:%.1f%%\n",
i_mb_count[I_4x4] / i_count,
+ i_mb_count[I_8x8] / i_count,
i_mb_count[I_16x16]/ i_count,
i_mb_count[P_L0] / i_count,
i_mb_count[P_8x8] / i_count,
const int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_B];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
- "slice B Avg I4x4:%.1f%% I16x16:%.1f%% P:%.1f%% B:%.1f%% B8x8:%.1f%% DIRECT:%.1f%% BSKIP:%.1f%%\n",
+ "slice B Avg I4x4:%.1f%% I8x8:%.1f%% I16x16:%.1f%% P:%.1f%% B:%.1f%% B8x8:%.1f%% DIRECT:%.1f%% BSKIP:%.1f%%\n",
i_mb_count[I_4x4] / i_count,
+ i_mb_count[I_8x8] / i_count,
i_mb_count[I_16x16] / i_count,
(i_mb_count[B_L0_L0] + i_mb_count[B_L1_L1] + i_mb_count[B_L1_L0] + i_mb_count[B_L0_L1]) / i_count,
(i_mb_count[B_BI_BI] + i_mb_count[B_L0_BI] + i_mb_count[B_L1_BI] + i_mb_count[B_BI_L0] + i_mb_count[B_BI_L1]) / i_count,
};
static const uint8_t block_idx_xy[4][4] =
{
- { 0, 2, 8, 10},
- { 1, 3, 9, 11},
- { 4, 6, 12, 14},
- { 5, 7, 13, 15}
+ { 0, 2, 8, 10 },
+ { 1, 3, 9, 11 },
+ { 4, 6, 12, 14 },
+ { 5, 7, 13, 15 }
};
static const int quant_mf[6][4][4] =
{
- { { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243},
- { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243} },
- { { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660},
- { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660} },
- { { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194},
- { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194} },
- { { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647},
- { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647} },
- { { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355},
- { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355} },
- { { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893},
- { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893} }
+ { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
+ { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
+ { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
+ { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
+ { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
+ { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
+ { { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 },
+ { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 } },
+ { { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 },
+ { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 } },
+ { { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 },
+ { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 } }
+};
+
+const int quant8_mf[6][8][8] =
+{
+ {
+ { 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222 },
+ { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
+ { 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481 },
+ { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
+ { 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222 },
+ { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
+ { 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481 },
+ { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 }
+ }, {
+ { 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058 },
+ { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
+ { 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290 },
+ { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
+ { 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058 },
+ { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
+ { 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290 },
+ { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 }
+ }, {
+ { 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675 },
+ { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 },
+ { 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985 },
+ { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 },
+ { 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675 },
+ { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 },
+ { 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985 },
+ { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 }
+ }, {
+ { 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931 },
+ { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 },
+ { 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259 },
+ { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 },
+ { 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931 },
+ { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 },
+ { 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259 },
+ { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 }
+ }, {
+ { 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740 },
+ { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 },
+ { 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777 },
+ { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 },
+ { 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740 },
+ { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 },
+ { 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777 },
+ { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 }
+ }, {
+ { 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830 },
+ { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 },
+ { 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640 },
+ { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 },
+ { 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830 },
+ { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 },
+ { 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640 },
+ { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 }
+ }
};
static const int i_chroma_qp_table[52] =
//static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
//static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
+#define ZIG(i,y,x) level[i] = dct[y][x];
+static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
+{
+ ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
+ ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
+ ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
+ ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
+ ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
+ ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
+ ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
+ ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
+ ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
+ ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
+ ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
+ ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
+ ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
+ ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
+ ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
+ ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
+}
static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
{
- level[0] = dct[0][0];
- level[1] = dct[0][1];
- level[2] = dct[1][0];
- level[3] = dct[2][0];
- level[4] = dct[1][1];
- level[5] = dct[0][2];
- level[6] = dct[0][3];
- level[7] = dct[1][2];
- level[8] = dct[2][1];
- level[9] = dct[3][0];
- level[10] = dct[3][1];
- level[11] = dct[2][2];
- level[12] = dct[1][3];
- level[13] = dct[2][3];
- level[14] = dct[3][2];
- level[15] = dct[3][3];
-#if 0
- int i;
- for( i = 0; i < 16; i++ )
- {
- level[i] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
- }
-#endif
+ ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
+ ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
+ ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
+ ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
}
static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
{
- level[0] = dct[0][1];
- level[1] = dct[1][0];
- level[2] = dct[2][0];
- level[3] = dct[1][1];
- level[4] = dct[0][2];
- level[5] = dct[0][3];
- level[6] = dct[1][2];
- level[7] = dct[2][1];
- level[8] = dct[3][0];
- level[9] = dct[3][1];
- level[10] = dct[2][2];
- level[11] = dct[1][3];
- level[12] = dct[2][3];
- level[13] = dct[3][2];
- level[14] = dct[3][3];
-#if 0
- int i;
- for( i = 1; i < 16; i++ )
- {
- level[i - 1] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
- }
-#endif
+ ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
+ ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
+ ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
+ ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
}
-
static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
{
- level[0] = dct[0][0];
- level[1] = dct[0][1];
- level[2] = dct[1][0];
- level[3] = dct[1][1];
+ ZIG(0,0,0)
+ ZIG(1,0,1)
+ ZIG(2,1,0)
+ ZIG(3,1,1)
}
+#undef ZIG
+static void quant_8x8( int16_t dct[8][8], int i_qscale, int b_intra )
+{
+ const int i_qbits = 16 + i_qscale / 6;
+ const int i_mf = i_qscale % 6;
+ const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
+ int x,y;
+ for( y = 0; y < 8; y++ )
+ {
+ for( x = 0; x < 8; x++ )
+ {
+ if( dct[y][x] > 0 )
+ dct[y][x] = ( f + dct[y][x] * quant8_mf[i_mf][y][x] ) >> i_qbits;
+ else
+ dct[y][x] = - ( ( f - dct[y][x] * quant8_mf[i_mf][y][x] ) >> i_qbits );
+ }
+ }
+}
static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
{
const int i_qbits = 15 + i_qscale / 6;
for( x = 0; x < 4; x++ )
{
if( dct[y][x] > 0 )
- {
- dct[y][x] =( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
- }
+ dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
else
- {
- dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
- }
+ dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
}
}
}
for( x = 0; x < 4; x++ )
{
if( dct[y][x] > 0 )
- {
dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
- }
else
- {
dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
- }
}
}
}
for( x = 0; x < 2; x++ )
{
if( dct[y][x] > 0 )
- {
dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
- }
else
- {
dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
- }
}
}
}
#endif
-static inline int array_non_zero_count( int *v, int i_count )
-{
- int i;
- int i_nz;
-
- for( i = 0, i_nz = 0; i < i_count; i++ )
- {
- if( v[i] )
- {
- i_nz++;
- }
- }
- return i_nz;
-}
-
/* (ref: JVT-B118)
* x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
* to 0 (low score means set it to null)
*/
static int x264_mb_decimate_score( int *dct, int i_max )
{
- static const int i_ds_table[16] = { 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
+ static const int i_ds_table4[16] = {
+ 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
+ static const int i_ds_table8[64] = {
+ 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
+ 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+
+ const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
int i_score = 0;
int idx = i_max - 1;
while( idx >= 0 && dct[idx] == 0 )
- {
idx--;
- }
while( idx >= 0 )
{
int i_run;
if( abs( dct[idx--] ) > 1 )
- {
return 9;
- }
i_run = 0;
while( idx >= 0 && dct[idx] == 0 )
idx--;
i_run++;
}
- i_score += i_ds_table[i_run];
+ i_score += ds_table[i_run];
}
return i_score;
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
{
const int i_stride = h->mb.pic.i_stride[0];
- uint8_t *p_src = &h->mb.pic.p_fenc[0][4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride];
- uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride];
-
+ const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
+ uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
+ uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
int16_t dct4x4[4][4];
h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
-
quant_4x4( dct4x4, i_qscale, 1 );
-
scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
-
x264_mb_dequant_4x4( dct4x4, i_qscale );
/* output samples to fdec */
h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
}
+void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
+{
+ const int i_stride = h->mb.pic.i_stride[0];
+ const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
+ uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
+ uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
+ int16_t dct8x8[8][8];
+
+ h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
+ quant_8x8( dct8x8, i_qscale, 1 );
+ scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
+ x264_mb_dequant_8x8( dct8x8, i_qscale );
+ h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
+}
+
static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
{
const int i_stride = h->mb.pic.i_stride[0];
h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
}
-static void x264_mb_encode_8x8( x264_t *h, int b_inter, int i_qscale )
+static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
{
int i, ch;
/* fix the pred mode value */
h->mb.i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix[i_mode];
}
+ else if( h->mb.i_type == I_8x8 )
+ {
+ for( i = 0; i < 4; i++ )
+ {
+ const int i_dst = h->mb.pic.i_stride[0];
+ uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
+ int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
+
+ h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
+ x264_mb_encode_i8x8( h, i, i_qscale );
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]] = x264_mb_pred_mode4x4_fix(i_mode);
+ }
+ }
else if( h->mb.i_type == I_4x4 )
{
for( i = 0; i < 16; i++ )
uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
- /* Do the right prediction */
h->predict_4x4[i_mode]( p_dst, i_dst );
-
- /* encode one 4x4 block */
x264_mb_encode_i4x4( h, i, i_qscale );
-
- /* fix the pred mode value */
- h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix[i_mode];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix(i_mode);
}
}
else /* Inter MB */
{
- int16_t dct4x4[16][4][4];
-
int i8x8, i4x4, idx;
int i_decimate_mb = 0;
/* Motion compensation */
x264_mb_mc( h );
- h->dctf.sub16x16_dct( dct4x4,
- h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
- h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
-
- for( i8x8 = 0; i8x8 < 4; i8x8++ )
+ if( h->mb.b_transform_8x8 )
{
- int i_decimate_8x8;
+ int16_t dct8x8[4][8][8];
+ h->dctf.sub16x16_dct8( dct8x8,
+ h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
+ h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
- /* encode one 4x4 block */
- i_decimate_8x8 = 0;
- for( i4x4 = 0; i4x4 < 4; i4x4++ )
+ for( idx = 0; idx < 4; idx++ )
{
- idx = i8x8 * 4 + i4x4;
+ int i_decimate_8x8;
- quant_4x4( dct4x4[idx], i_qscale, 0 );
- scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
- x264_mb_dequant_4x4( dct4x4[idx], i_qscale );
+ quant_8x8( dct8x8[idx], i_qscale, 0 );
+ scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
+ x264_mb_dequant_8x8( dct8x8[idx], i_qscale );
- i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
+ i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
+ i_decimate_mb += i_decimate_8x8;
+ if( i_decimate_8x8 < 4 )
+ {
+ memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
+ memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
+ }
}
- /* decimate this 8x8 block */
- i_decimate_mb += i_decimate_8x8;
- if( i_decimate_8x8 < 4 )
+ if( i_decimate_mb < 6 )
+ memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
+ else
+ h->dctf.add16x16_idct8( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct8x8 );
+ }
+ else
+ {
+ int16_t dct4x4[16][4][4];
+ h->dctf.sub16x16_dct( dct4x4,
+ h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
+ h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
+
+ for( i8x8 = 0; i8x8 < 4; i8x8++ )
{
+ int i_decimate_8x8;
+
+ /* encode one 4x4 block */
+ i_decimate_8x8 = 0;
for( i4x4 = 0; i4x4 < 4; i4x4++ )
{
- int x, y;
idx = i8x8 * 4 + i4x4;
- for( i = 0; i < 16; i++ )
- {
- h->dct.block[idx].luma4x4[i] = 0;
- }
- for( x = 0; x < 4; x++ )
- {
- for( y = 0; y < 4; y++ )
- {
- dct4x4[idx][x][y] = 0;
- }
- }
+
+ quant_4x4( dct4x4[idx], i_qscale, 0 );
+ scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
+ x264_mb_dequant_4x4( dct4x4[idx], i_qscale );
+
+ i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
}
- }
- }
- if( i_decimate_mb < 6 )
- {
- for( idx = 0; idx < 16; idx++ )
- {
- for( i = 0; i < 16; i++ )
+ /* decimate this 8x8 block */
+ i_decimate_mb += i_decimate_8x8;
+ if( i_decimate_8x8 < 4 )
{
- h->dct.block[idx].luma4x4[i] = 0;
+ for( i4x4 = 0; i4x4 < 4; i4x4++ )
+ {
+ int x, y;
+ idx = i8x8 * 4 + i4x4;
+ for( i = 0; i < 16; i++ )
+ h->dct.block[idx].luma4x4[i] = 0;
+ for( x = 0; x < 4; x++ )
+ for( y = 0; y < 4; y++ )
+ dct4x4[idx][x][y] = 0;
+ }
}
}
- }
- else
- {
- h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
+
+ if( i_decimate_mb < 6 )
+ for( idx = 0; idx < 16; idx++ )
+ for( i = 0; i < 16; i++ )
+ h->dct.block[idx].luma4x4[i] = 0;
+ else
+ h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
}
}
{
const int i_mode = h->mb.i_chroma_pred_mode;
/* do the right prediction */
- h->predict_8x8[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
- h->predict_8x8[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
/* fix the pred mode value */
- h->mb.i_chroma_pred_mode = x264_mb_pred_mode8x8_fix[i_mode];
+ h->mb.i_chroma_pred_mode = x264_mb_pred_mode8x8c_fix[i_mode];
}
/* encode the 8x8 blocks */
- x264_mb_encode_8x8( h, !IS_INTRA( h->mb.i_type ), i_qscale );
+ x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qscale );
/* Calculate the Luma/Chroma patern and non_zero_count */
+ h->mb.i_cbp_luma = 0x00;
if( h->mb.i_type == I_16x16 )
{
- h->mb.i_cbp_luma = 0x00;
for( i = 0; i < 16; i++ )
{
const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
if( nz > 0 )
- {
h->mb.i_cbp_luma = 0x0f;
- }
+ }
+ }
+ else if( h->mb.b_transform_8x8 )
+ {
+ /* coded_block_flag is enough for CABAC,
+ * but CAVLC needs the full non_zero_count. */
+ for( i = 0; i < 4; i++ )
+ {
+ const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
+ int j;
+ for( j = 0; j < 4; j++ )
+ h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
+ if( nz > 0 )
+ h->mb.i_cbp_luma |= 1 << i;
}
}
else
{
- h->mb.i_cbp_luma = 0x00;
for( i = 0; i < 16; i++ )
{
const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
if( nz > 0 )
- {
h->mb.i_cbp_luma |= 1 << (i/4);
- }
}
}
h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = B_SKIP;
h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
}
+
+ if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
+ h->mb.b_transform_8x8 = 0;
}
/*****************************************************************************
void x264_cabac_mb_skip( x264_t *h, int b_skip );
+static inline int array_non_zero( int *v, int i_count )
+{
+ int i;
+ for( i = 0; i < i_count; i++ )
+ if( v[i] ) return 1;
+ return 0;
+}
+
+static inline int array_non_zero_count( int *v, int i_count )
+{
+ int i;
+ int i_nz;
+
+ for( i = 0, i_nz = 0; i < i_count; i++ )
+ if( v[i] )
+ i_nz++;
+
+ return i_nz;
+}
+
+
#endif
void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
{
- sps->i_id = i_id;
+ sps->i_id = i_id;
- if( param->b_cabac || param->i_bframe > 0 )
- sps->i_profile_idc = PROFILE_MAIN;
+ if( param->analyse.b_transform_8x8 )
+ sps->i_profile_idc = PROFILE_HIGH;
+ else if( param->b_cabac || param->i_bframe > 0 )
+ sps->i_profile_idc = PROFILE_MAIN;
else
- sps->i_profile_idc = PROFILE_BASELINE;
+ sps->i_profile_idc = PROFILE_BASELINE;
sps->i_level_idc = param->i_level_idc;
sps->b_constraint_set0 = 0;
bs_write( s, 8, sps->i_level_idc );
bs_write_ue( s, sps->i_id );
+
+ if( sps->i_profile_idc >= PROFILE_HIGH )
+ {
+ bs_write_ue( s, 1 ); // chroma_format_idc = 4:2:0
+ bs_write_ue( s, 0 ); // bit_depth_luma_minus8
+ bs_write_ue( s, 0 ); // bit_depth_chroma_minus8
+ bs_write( s, 1, 0 ); // qpprime_y_zero_transform_bypass_flag
+ bs_write( s, 1, 0 ); // seq_scaling_matrix_present_flag
+ }
+
bs_write_ue( s, sps->i_log2_max_frame_num - 4 );
bs_write_ue( s, sps->i_poc_type );
if( sps->i_poc_type == 0 )
pps->b_deblocking_filter_control = 1;
pps->b_constrained_intra_pred = 0;
pps->b_redundant_pic_cnt = 0;
+
+ pps->b_transform_8x8_mode = param->analyse.b_transform_8x8 ? 1 : 0;
}
void x264_pps_write( bs_t *s, x264_pps_t *pps )
bs_write( s, 1, pps->b_constrained_intra_pred );
bs_write( s, 1, pps->b_redundant_pic_cnt );
+ if( pps->b_transform_8x8_mode )
+ {
+ bs_write( s, 1, pps->b_transform_8x8_mode );
+ bs_write( s, 1, 0 ); // pic_scaling_matrix_present_flag
+ bs_write_se( s, 0 ); // second_chroma_qp_index_offset
+ }
+
bs_rbsp_trailing( s );
}
for( i = I_PRED_CHROMA_DC; i <= I_PRED_CHROMA_P; i++ )
{
int i_cost;
- h->predict_8x8[i]( &pix1[10], 9 );
+ h->predict_8x8c[i]( &pix1[10], 9 );
i_cost = h->pixf.satd[PIXEL_8x8]( &pix1[10], 9, src, i_stride ) + intra_penalty;
i_bcost = X264_MIN( i_bcost, i_cost );
}
#include <stdarg.h>
-#define X264_BUILD 28
+#define X264_BUILD 29
/* x264_t:
* opaque handler for decoder and encoder */
/* Analyse flags
*/
#define X264_ANALYSE_I4x4 0x0001 /* Analyse i4x4 */
+#define X264_ANALYSE_I8x8 0x0002 /* Analyse i8x8 (requires 8x8 transform) */
#define X264_ANALYSE_PSUB16x16 0x0010 /* Analyse p16x8, p8x16 and p8x8 */
#define X264_ANALYSE_PSUB8x8 0x0020 /* Analyse p8x4, p4x8, p4x4 */
#define X264_ANALYSE_BSUB16x16 0x0100 /* Analyse b16x8, b8x16 and b8x8 */
/* Encoder analyser parameters */
struct
{
- unsigned int intra; /* intra flags */
- unsigned int inter; /* inter flags */
+ unsigned int intra; /* intra partitions */
+ unsigned int inter; /* inter partitions */
+
+ int b_transform_8x8;
int i_direct_mv_pred; /* spatial vs temporal mv prediction */
int i_me_method; /* motion estimation algorithm to use (X264_ME_*) */