From: Loren Merritt Date: Sat, 22 Mar 2008 09:06:18 +0000 (-0600) Subject: don't distinguish between luma4x4 and luma4x4ac X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=52fb83347c17f88ea523763223b555ff5f475698;p=libx264 don't distinguish between luma4x4 and luma4x4ac --- diff --git a/common/common.h b/common/common.h index eace850c..4a18cfcf 100644 --- a/common/common.h +++ b/common/common.h @@ -338,13 +338,9 @@ struct x264_t { DECLARE_ALIGNED( int16_t, luma16x16_dc[16], 16 ); DECLARE_ALIGNED( int16_t, chroma_dc[2][4], 16 ); - // FIXME merge with union + // FIXME share memory? DECLARE_ALIGNED( int16_t, luma8x8[4][64], 16 ); - union - { - DECLARE_ALIGNED( int16_t, residual_ac[15], 16 ); - DECLARE_ALIGNED( int16_t, luma4x4[16], 16 ); - } block[16+8]; + DECLARE_ALIGNED( int16_t, luma4x4[16+8][16], 16 ); } dct; /* MB table and cache for current frame/mb */ diff --git a/common/dct.c b/common/dct.c index 8b57055f..7b6e2a75 100644 --- a/common/dct.c +++ b/common/dct.c @@ -521,22 +521,6 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] ) *(uint64_t*)(level+12) = *(uint64_t*)(*dct+12); } -static void zigzag_scan_4x4ac_frame( int16_t level[15], int16_t dct[4][4] ) -{ - ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0) - ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2) - ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2) - ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3) -} - -static void zigzag_scan_4x4ac_field( int16_t level[15], int16_t dct[4][4] ) -{ - ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0) - ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1) - ZIG( 7,0,2) ZIG( 8,1,2) ZIG( 9,2,2) ZIG(10,3,2) - ZIG(11,0,3) ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,3) -} - #undef ZIG #define ZIG(i,y,x) {\ int oe = x+y*FENC_STRIDE;\ @@ -567,24 +551,6 @@ static void zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8 COPY4x4 } -static void zigzag_sub_4x4ac_frame( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst ) -{ - ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0) - ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2) - ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2) - ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3) - COPY4x4 -} - -static void zigzag_sub_4x4ac_field( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst ) -{ - ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0) - ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1) - ZIG( 7,0,2) ZIG( 8,1,2) ZIG( 9,2,2) ZIG(10,3,2) - ZIG(11,0,3) ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,3) - COPY4x4 -} - #undef ZIG #undef COPY4x4 @@ -594,9 +560,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced ) { pf->scan_8x8 = zigzag_scan_8x8_field; pf->scan_4x4 = zigzag_scan_4x4_field; - pf->scan_4x4ac = zigzag_scan_4x4ac_field; pf->sub_4x4 = zigzag_sub_4x4_field; - pf->sub_4x4ac = zigzag_sub_4x4ac_field; #ifdef HAVE_MMX if( cpu&X264_CPU_MMXEXT ) pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext; @@ -604,20 +568,14 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced ) #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) - { pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec; - pf->scan_4x4ac = x264_zigzag_scan_4x4ac_field_altivec; - } #endif } else { pf->scan_8x8 = zigzag_scan_8x8_frame; pf->scan_4x4 = zigzag_scan_4x4_frame; - pf->scan_4x4ac = zigzag_scan_4x4ac_frame; pf->sub_4x4 = zigzag_sub_4x4_frame; - pf->sub_4x4ac = zigzag_sub_4x4ac_frame; - #ifdef HAVE_SSE3 if( cpu&X264_CPU_SSSE3 ) pf->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3; @@ -625,10 +583,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced ) #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) - { pf->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec; - pf->scan_4x4ac = x264_zigzag_scan_4x4ac_frame_altivec; - } #endif } } diff --git a/common/dct.h b/common/dct.h index cf7dbbd1..38aa0788 100644 --- a/common/dct.h +++ b/common/dct.h @@ -110,9 +110,7 @@ typedef struct { void (*scan_8x8)( int16_t level[64], int16_t dct[8][8] ); void (*scan_4x4)( int16_t level[16], int16_t dct[4][4] ); - void (*scan_4x4ac)( int16_t level[15], int16_t dct[4][4] ); void (*sub_4x4)( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst ); - void (*sub_4x4ac)( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst ); } x264_zigzag_function_t; diff --git a/common/ppc/dct.c b/common/ppc/dct.c index 024a157a..5ba2264a 100644 --- a/common/ppc/dct.c +++ b/common/ppc/dct.c @@ -491,38 +491,3 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] ) vec_st( tmp1v, 0x10, level ); } -void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] ) -{ - vec_s16_t dct0v, dct1v; - vec_s16_t tmp0v, tmp1v; - - dct0v = vec_ld(0x00, (int16_t*)dct); - dct1v = vec_ld(0x10, (int16_t*)dct); - - const vec_u8_t sel0 = (vec_u8_t) CV(8,9,2,3,4,5,10,11,16,17,24,25,18,19,12,13); - const vec_u8_t sel1 = (vec_u8_t) CV(6,7,14,15,20,21,26,27,28,29,22,23,30,31,0,1); - - tmp0v = vec_perm( dct0v, dct1v, sel0 ); - tmp1v = vec_perm( dct0v, dct1v, sel1 ); - - vec_st( tmp0v, 0x00, level ); - vec_st( tmp1v, 0x10, level ); -} - -void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] ) -{ - vec_s16_t dct0v, dct1v; - vec_s16_t tmp0v, tmp1v; - - dct0v = vec_ld(0x00, (int16_t*)dct); - dct1v = vec_ld(0x10, (int16_t*)dct); - - const vec_u8_t sel0 = (vec_u8_t) CV(2,3,8,9,4,5,6,7,10,11,12,13,14,15,16,17); - const vec_u8_t sel1 = (vec_u8_t) CV(18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1); - - tmp0v = vec_perm( dct0v, dct1v, sel0 ); - tmp1v = vec_perm( dct0v, dct1v, sel1 ); - - vec_st( tmp0v, 0x00, level ); - vec_st( tmp1v, 0x10, level ); -} diff --git a/common/ppc/dct.h b/common/ppc/dct.h index fa3023b1..4902de57 100644 --- a/common/ppc/dct.h +++ b/common/ppc/dct.h @@ -45,9 +45,6 @@ void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] ); void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] ); void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] ); -void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] ); - void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] ); -void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] ); #endif diff --git a/encoder/analyse.c b/encoder/analyse.c index 1217e651..13152a52 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -741,7 +741,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ { h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 ); if( h->mb.i_skip_intra == 2 ) - h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.block, sizeof(h->mb.pic.i4x4_dct_buf) ); + h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) ); } } else diff --git a/encoder/cabac.c b/encoder/cabac.c index 08c4f8d7..ed7a3f2d 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -1026,7 +1026,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) /* AC Luma */ if( h->mb.i_cbp_luma != 0 ) for( i = 0; i < 16; i++ ) - block_residual_write_cabac( h, cb, DCT_LUMA_AC, i, h->dct.block[i].residual_ac, 15 ); + block_residual_write_cabac( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 15 ); } else if( h->mb.b_transform_8x8 ) { @@ -1038,7 +1038,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) { for( i = 0; i < 16; i++ ) if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) ) - block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i, h->dct.block[i].luma4x4, 16 ); + block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], 16 ); } if( h->mb.i_cbp_chroma &0x03 ) /* Chroma DC residual present */ @@ -1049,7 +1049,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */ { for( i = 16; i < 24; i++ ) - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[i].residual_ac, 15 ); + block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 ); } } @@ -1119,12 +1119,12 @@ void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel { int i4; for( i4 = 0; i4 < 4; i4++ ) - block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 ); + block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 ); } } - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.block[16+i8].residual_ac, 15 ); - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.block[20+i8].residual_ac, 15 ); + block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 15 ); + block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 15 ); i8 += x264_pixel_size[i_pixel].h >> 3; } @@ -1143,7 +1143,7 @@ static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 ); i_mode = x264_mb_pred_mode4x4_fix( i_mode ); x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode ); - block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.block[i4].luma4x4, 16 ); + block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 ); } static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb ) @@ -1158,7 +1158,7 @@ static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb ) { int i; for( i = 16; i < 24; i++ ) - block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[i].residual_ac, 15 ); + block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 ); } } } diff --git a/encoder/cavlc.c b/encoder/cavlc.c index 22367a21..e04ba5b2 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -304,16 +304,16 @@ static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8s for( i4 = 0; i4 < 4; i4++ ) { for( i = 0; i < 16; i++ ) - h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4]; + h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4]; h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = - array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 ); + array_non_zero_count( h->dct.luma4x4[i4+i8*4], 16 ); } } for( i8 = i8start; i8 <= i8end; i8++ ) if( h->mb.i_cbp_luma & (1 << i8) ) for( i4 = 0; i4 < 4; i4++ ) - block_residual_write_cavlc( h, s, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 ); + block_residual_write_cavlc( h, s, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 ); } /***************************************************************************** @@ -666,7 +666,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) /* AC Luma */ if( h->mb.i_cbp_luma != 0 ) for( i = 0; i < 16; i++ ) - block_residual_write_cavlc( h, s, i, h->dct.block[i].residual_ac, 15 ); + block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 ); } else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 ) { @@ -680,7 +680,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s ) block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 ); if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */ for( i = 16; i < 24; i++ ) - block_residual_write_cavlc( h, s, i, h->dct.block[i].residual_ac, 15 ); + block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 ); } #ifndef RDO_SKIP_BS @@ -746,8 +746,8 @@ int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel ) { x264_macroblock_luma_write_cavlc( h, &s, i8, i8 ); - block_residual_write_cavlc( h, &s, 16+i8, h->dct.block[16+i8].residual_ac, 15 ); - block_residual_write_cavlc( h, &s, 20+i8, h->dct.block[20+i8].residual_ac, 15 ); + block_residual_write_cavlc( h, &s, 16+i8, h->dct.luma4x4[16+i8]+1, 15 ); + block_residual_write_cavlc( h, &s, 20+i8, h->dct.luma4x4[20+i8]+1, 15 ); i8 += x264_pixel_size[i_pixel].h >> 3; } @@ -770,10 +770,10 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode ) for( i4 = 0; i4 < 4; i4++ ) { for( i = 0; i < 16; i++ ) - h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4]; + h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4]; h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = - array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 ); - block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 ); + array_non_zero_count( h->dct.luma4x4[i4+i8*4], 16 ); + block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 ); } return h->out.bs.i_bits_encoded; } @@ -781,7 +781,7 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode ) static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode ) { h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode ); - block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.block[i4].luma4x4, 16 ); + block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.luma4x4[i4], 16 ); return h->out.bs.i_bits_encoded; } @@ -797,7 +797,7 @@ static int x264_i8x8_chroma_size_cavlc( x264_t *h ) { int i; for( i = 16; i < 24; i++ ) - block_residual_write_cavlc( h, &h->out.bs, i, h->dct.block[i].residual_ac, 15 ); + block_residual_write_cavlc( h, &h->out.bs, i, h->dct.luma4x4[i]+1, 15 ); } } return h->out.bs.i_bits_encoded; diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 5f288635..eb221b7a 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -89,7 +89,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) if( h->mb.b_lossless ) { - h->zigzagf.sub_4x4( h->dct.block[idx].luma4x4, p_src, p_dst ); + h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst ); return; } @@ -100,7 +100,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) else h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] ); - h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4 ); + h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 ); h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale ); /* output samples to fdec */ @@ -142,7 +142,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) { int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE; int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE; - h->zigzagf.sub_4x4ac( h->dct.block[i].residual_ac, p_src+oe, p_dst+od ); + h->zigzagf.sub_4x4( h->dct.luma4x4[i], p_src+oe, p_dst+od ); dct4x4[0][block_idx_x[i]][block_idx_y[i]] = p_src[oe] - p_dst[od]; p_dst[od] = p_src[oe]; } @@ -162,7 +162,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) else h->quantf.quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] ); - h->zigzagf.scan_4x4ac( h->dct.block[i].residual_ac, dct4x4[1+i] ); + h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[1+i] ); h->quantf.dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale ); } @@ -204,7 +204,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) { int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE; int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE; - h->zigzagf.sub_4x4ac( h->dct.block[16+i+ch*4].residual_ac, p_src+oe, p_dst+od ); + h->zigzagf.sub_4x4( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od ); h->dct.chroma_dc[ch][i] = p_src[oe] - p_dst[od]; p_dst[od] = p_src[oe]; } @@ -220,11 +220,11 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) /* no trellis; it doesn't seem to help chroma noticeably */ h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qscale], h->quant4_bias[CQM_4IC+b_inter][i_qscale] ); - h->zigzagf.scan_4x4ac( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] ); + h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] ); if( b_decimate ) { - i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 ); + i_decimate_score += x264_mb_decimate_score( h->dct.luma4x4[16+i+ch*4]+1, 15 ); } } @@ -239,7 +239,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) if( b_decimate && i_decimate_score < 7 ) { /* Near null chroma 8x8 block so make it null (bits saving) */ - memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) ); + memset( &h->dct.luma4x4[16+ch*4], 0, 4 * sizeof( *h->dct.luma4x4 ) ); if( !array_non_zero( dct2x2 ) ) continue; memset( dct4x4, 0, sizeof( dct4x4 ) ); @@ -259,7 +259,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) h->mb.i_cbp_chroma = 0; for( i = 0; i < 8; i++ ) { - int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 ); + int nz = array_non_zero_count( h->dct.luma4x4[16+i]+1, 15 ); h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz; h->mb.i_cbp_chroma |= nz; } @@ -395,7 +395,7 @@ void x264_macroblock_encode( x264_t *h ) h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 ); /* In RD mode, restore the now-overwritten DCT data. */ if( h->mb.i_skip_intra == 2 ) - h->mc.memcpy_aligned( h->dct.block, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) ); + h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) ); } for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ ) { @@ -424,7 +424,7 @@ void x264_macroblock_encode( x264_t *h ) { int x = 4*block_idx_x[i4x4]; int y = 4*block_idx_y[i4x4]; - h->zigzagf.sub_4x4( h->dct.block[i4x4].luma4x4, + h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4], h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE, h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE ); } @@ -497,10 +497,10 @@ void x264_macroblock_encode( x264_t *h ) else h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); - h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4[idx] ); + h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] ); if( b_decimate ) - i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 ); + i_decimate_8x8 += x264_mb_decimate_score( h->dct.luma4x4[idx], 16 ); } /* decimate this 8x8 block */ @@ -508,13 +508,13 @@ void x264_macroblock_encode( x264_t *h ) if( i_decimate_8x8 < 4 && b_decimate ) { memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) ); - memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) ); + memset( &h->dct.luma4x4[i8x8*4], 0, 4 * sizeof( *h->dct.luma4x4 ) ); nnz8x8[i8x8] = 0; } } if( i_decimate_mb < 6 && b_decimate ) - memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) ); + memset( h->dct.luma4x4, 0, 16 * sizeof( *h->dct.luma4x4 ) ); else { for( i8x8 = 0; i8x8 < 4; i8x8++ ) @@ -545,7 +545,7 @@ void x264_macroblock_encode( x264_t *h ) { for( i = 0; i < 16; i++ ) { - const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 ); + const int nz = array_non_zero_count( h->dct.luma4x4[i]+1, 15 ); h->mb.cache.non_zero_count[x264_scan8[i]] = nz; if( nz > 0 ) h->mb.i_cbp_luma = 0x0f; @@ -569,7 +569,7 @@ void x264_macroblock_encode( x264_t *h ) { for( i = 0; i < 16; i++ ) { - const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 ); + const int nz = array_non_zero_count( h->dct.luma4x4[i], 16 ); h->mb.cache.non_zero_count[x264_scan8[i]] = nz; if( nz > 0 ) h->mb.i_cbp_luma |= 1 << (i/4); @@ -697,9 +697,9 @@ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir ) for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ ) { h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ); - h->zigzagf.scan_4x4ac( dctscan, dct4x4[i4x4] ); + h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] ); - i_decimate_mb += x264_mb_decimate_score( dctscan, 15 ); + i_decimate_mb += x264_mb_decimate_score( dctscan+1, 15 ); if( i_decimate_mb >= 7 ) { return 0; @@ -812,13 +812,13 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) h->quantf.quant_4x4( dct4x4[2], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); h->quantf.quant_4x4( dct4x4[3], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); for( i4 = 0; i4 < 4; i4++ ) - h->zigzagf.scan_4x4( h->dct.block[i8*4+i4].luma4x4, dct4x4[i4] ); + h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] ); if( b_decimate ) { int i_decimate_8x8 = 0; for( i4 = 0; i4 < 4 && i_decimate_8x8 < 4; i4++ ) - i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[i8*4+i4].luma4x4, 16 ); + i_decimate_8x8 += x264_mb_decimate_score( h->dct.luma4x4[i8*4+i4], 16 ); nnz8x8 = 4 <= i_decimate_8x8; } else @@ -842,7 +842,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec ); h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ); - h->zigzagf.scan_4x4ac( h->dct.block[16+i8+ch*4].residual_ac, dct4x4 ); + h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 ); if( array_non_zero( dct4x4 ) ) { h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp ); diff --git a/tools/checkasm.c b/tools/checkasm.c index 74b2bf91..9e71e612 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -385,9 +385,7 @@ static int check_dct( int cpu_ref, int cpu_new ) ok = 1; used_asm = 0; TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 ); TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 ); - TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 ); TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 ); - TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 ); report( "zigzag_frame :" ); x264_zigzag_init( 0, &zigzag_c, 1 ); @@ -397,9 +395,7 @@ static int check_dct( int cpu_ref, int cpu_new ) ok = 1; used_asm = 0; TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 ); TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 ); - TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 ); TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 ); - TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 ); report( "zigzag_field :" ); #undef TEST_ZIGZAG_SCAN #undef TEST_ZIGZAG_SUB