From: Loren Merritt <pengvado@akuvian.org>
Date: Sat, 22 Mar 2008 09:06:18 +0000 (-0600)
Subject: don't distinguish between luma4x4 and luma4x4ac
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=52fb83347c17f88ea523763223b555ff5f475698;p=libx264

don't distinguish between luma4x4 and luma4x4ac
---

diff --git a/common/common.h b/common/common.h
index eace850c..4a18cfcf 100644
--- a/common/common.h
+++ b/common/common.h
@@ -338,13 +338,9 @@ struct x264_t
     {
         DECLARE_ALIGNED( int16_t, luma16x16_dc[16], 16 );
         DECLARE_ALIGNED( int16_t, chroma_dc[2][4], 16 );
-        // FIXME merge with union
+        // FIXME share memory?
         DECLARE_ALIGNED( int16_t, luma8x8[4][64], 16 );
-        union
-        {
-            DECLARE_ALIGNED( int16_t, residual_ac[15], 16 );
-            DECLARE_ALIGNED( int16_t, luma4x4[16], 16 );
-        } block[16+8];
+        DECLARE_ALIGNED( int16_t, luma4x4[16+8][16], 16 );
     } dct;
 
     /* MB table and cache for current frame/mb */
diff --git a/common/dct.c b/common/dct.c
index 8b57055f..7b6e2a75 100644
--- a/common/dct.c
+++ b/common/dct.c
@@ -521,22 +521,6 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
     *(uint64_t*)(level+12) = *(uint64_t*)(*dct+12);
 }
 
-static void zigzag_scan_4x4ac_frame( int16_t level[15], int16_t dct[4][4] )
-{
-                ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
-    ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
-    ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
-    ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
-}
-
-static void zigzag_scan_4x4ac_field( int16_t level[15], int16_t dct[4][4] )
-{
-                ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0)
-    ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1)
-    ZIG( 7,0,2) ZIG( 8,1,2) ZIG( 9,2,2) ZIG(10,3,2)
-    ZIG(11,0,3) ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,3)
-}
-
 #undef ZIG
 #define ZIG(i,y,x) {\
     int oe = x+y*FENC_STRIDE;\
@@ -567,24 +551,6 @@ static void zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8
     COPY4x4
 }
 
-static void zigzag_sub_4x4ac_frame( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst )
-{
-                ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
-    ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
-    ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
-    ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
-    COPY4x4
-}
-
-static void zigzag_sub_4x4ac_field( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst )
-{
-                ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0)
-    ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1)
-    ZIG( 7,0,2) ZIG( 8,1,2) ZIG( 9,2,2) ZIG(10,3,2)
-    ZIG(11,0,3) ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,3)
-    COPY4x4
-}
-
 #undef ZIG
 #undef COPY4x4
 
@@ -594,9 +560,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
     {
         pf->scan_8x8   = zigzag_scan_8x8_field;
         pf->scan_4x4   = zigzag_scan_4x4_field;
-        pf->scan_4x4ac = zigzag_scan_4x4ac_field;
         pf->sub_4x4    = zigzag_sub_4x4_field;
-        pf->sub_4x4ac  = zigzag_sub_4x4ac_field;
 #ifdef HAVE_MMX
         if( cpu&X264_CPU_MMXEXT )
             pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext;
@@ -604,20 +568,14 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
 
 #ifdef ARCH_PPC
         if( cpu&X264_CPU_ALTIVEC )
-        {
             pf->scan_4x4   = x264_zigzag_scan_4x4_field_altivec;
-            pf->scan_4x4ac = x264_zigzag_scan_4x4ac_field_altivec;
-        }
 #endif
     }
     else
     {
         pf->scan_8x8   = zigzag_scan_8x8_frame;
         pf->scan_4x4   = zigzag_scan_4x4_frame;
-        pf->scan_4x4ac = zigzag_scan_4x4ac_frame;
         pf->sub_4x4    = zigzag_sub_4x4_frame;
-        pf->sub_4x4ac  = zigzag_sub_4x4ac_frame;
-
 #ifdef HAVE_SSE3
         if( cpu&X264_CPU_SSSE3 )
             pf->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3;
@@ -625,10 +583,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
 
 #ifdef ARCH_PPC
         if( cpu&X264_CPU_ALTIVEC )
-        {
             pf->scan_4x4   = x264_zigzag_scan_4x4_frame_altivec;
-            pf->scan_4x4ac = x264_zigzag_scan_4x4ac_frame_altivec;
-        }
 #endif
     }
 }
diff --git a/common/dct.h b/common/dct.h
index cf7dbbd1..38aa0788 100644
--- a/common/dct.h
+++ b/common/dct.h
@@ -110,9 +110,7 @@ typedef struct
 {
     void (*scan_8x8)( int16_t level[64], int16_t dct[8][8] );
     void (*scan_4x4)( int16_t level[16], int16_t dct[4][4] );
-    void (*scan_4x4ac)( int16_t level[15], int16_t dct[4][4] );
     void (*sub_4x4)( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst );
-    void (*sub_4x4ac)( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst );
 
 } x264_zigzag_function_t;
 
diff --git a/common/ppc/dct.c b/common/ppc/dct.c
index 024a157a..5ba2264a 100644
--- a/common/ppc/dct.c
+++ b/common/ppc/dct.c
@@ -491,38 +491,3 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
     vec_st( tmp1v, 0x10, level );
 }
 
-void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] )
-{
-    vec_s16_t dct0v, dct1v;
-    vec_s16_t tmp0v, tmp1v;
-
-    dct0v = vec_ld(0x00, (int16_t*)dct);
-    dct1v = vec_ld(0x10, (int16_t*)dct);
-
-    const vec_u8_t sel0 = (vec_u8_t) CV(8,9,2,3,4,5,10,11,16,17,24,25,18,19,12,13);
-    const vec_u8_t sel1 = (vec_u8_t) CV(6,7,14,15,20,21,26,27,28,29,22,23,30,31,0,1);
-
-    tmp0v = vec_perm( dct0v, dct1v, sel0 );
-    tmp1v = vec_perm( dct0v, dct1v, sel1 );
-
-    vec_st( tmp0v, 0x00, level );
-    vec_st( tmp1v, 0x10, level );
-}
-
-void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] )
-{
-    vec_s16_t dct0v, dct1v;
-    vec_s16_t tmp0v, tmp1v;
-
-    dct0v = vec_ld(0x00, (int16_t*)dct);
-    dct1v = vec_ld(0x10, (int16_t*)dct);
-
-    const vec_u8_t sel0 = (vec_u8_t) CV(2,3,8,9,4,5,6,7,10,11,12,13,14,15,16,17);
-    const vec_u8_t sel1 = (vec_u8_t) CV(18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1);
-
-    tmp0v = vec_perm( dct0v, dct1v, sel0 );
-    tmp1v = vec_perm( dct0v, dct1v, sel1 );
-
-    vec_st( tmp0v, 0x00, level );
-    vec_st( tmp1v, 0x10, level );
-}
diff --git a/common/ppc/dct.h b/common/ppc/dct.h
index fa3023b1..4902de57 100644
--- a/common/ppc/dct.h
+++ b/common/ppc/dct.h
@@ -45,9 +45,6 @@ void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] );
 void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] );
 
 void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] );
-
 void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] );
 
 #endif
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 1217e651..13152a52 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -741,7 +741,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
             {
                 h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
                 if( h->mb.i_skip_intra == 2 )
-                    h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.block, sizeof(h->mb.pic.i4x4_dct_buf) );
+                    h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) );
             }
         }
         else
diff --git a/encoder/cabac.c b/encoder/cabac.c
index 08c4f8d7..ed7a3f2d 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -1026,7 +1026,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
             /* AC Luma */
             if( h->mb.i_cbp_luma != 0 )
                 for( i = 0; i < 16; i++ )
-                    block_residual_write_cabac( h, cb, DCT_LUMA_AC, i, h->dct.block[i].residual_ac, 15 );
+                    block_residual_write_cabac( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 15 );
         }
         else if( h->mb.b_transform_8x8 )
         {
@@ -1038,7 +1038,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
         {
             for( i = 0; i < 16; i++ )
                 if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
-                    block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i, h->dct.block[i].luma4x4, 16 );
+                    block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], 16 );
         }
 
         if( h->mb.i_cbp_chroma &0x03 )    /* Chroma DC residual present */
@@ -1049,7 +1049,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
         if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
         {
             for( i = 16; i < 24; i++ )
-                block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[i].residual_ac, 15 );
+                block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
         }
     }
 
@@ -1119,12 +1119,12 @@ void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel
             {
                 int i4;
                 for( i4 = 0; i4 < 4; i4++ )
-                    block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
+                    block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
             }
         }
 
-        block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.block[16+i8].residual_ac, 15 );
-        block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.block[20+i8].residual_ac, 15 );
+        block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
+        block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
 
         i8 += x264_pixel_size[i_pixel].h >> 3;
     }
@@ -1143,7 +1143,7 @@ static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4,
     const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
     i_mode = x264_mb_pred_mode4x4_fix( i_mode );
     x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
-    block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.block[i4].luma4x4, 16 );
+    block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
 }
 
 static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
@@ -1158,7 +1158,7 @@ static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
         {
             int i;
             for( i = 16; i < 24; i++ )
-                block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[i].residual_ac, 15 );
+                block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
         }
     }
 }
diff --git a/encoder/cavlc.c b/encoder/cavlc.c
index 22367a21..e04ba5b2 100644
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -304,16 +304,16 @@ static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8s
                 for( i4 = 0; i4 < 4; i4++ )
                 {
                     for( i = 0; i < 16; i++ )
-                        h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4];
+                        h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4];
                     h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] =
-                        array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 );
+                        array_non_zero_count( h->dct.luma4x4[i4+i8*4], 16 );
                 }
     }
 
     for( i8 = i8start; i8 <= i8end; i8++ )
         if( h->mb.i_cbp_luma & (1 << i8) )
             for( i4 = 0; i4 < 4; i4++ )
-                block_residual_write_cavlc( h, s, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
+                block_residual_write_cavlc( h, s, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
 }
 
 /*****************************************************************************
@@ -666,7 +666,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         /* AC Luma */
         if( h->mb.i_cbp_luma != 0 )
             for( i = 0; i < 16; i++ )
-                block_residual_write_cavlc( h, s, i, h->dct.block[i].residual_ac, 15 );
+                block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 );
     }
     else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 )
     {
@@ -680,7 +680,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 );
         if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
             for( i = 16; i < 24; i++ )
-                block_residual_write_cavlc( h, s, i, h->dct.block[i].residual_ac, 15 );
+                block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 );
     }
 
 #ifndef RDO_SKIP_BS
@@ -746,8 +746,8 @@ int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
     {
         x264_macroblock_luma_write_cavlc( h, &s, i8, i8 );
 
-        block_residual_write_cavlc( h, &s, 16+i8, h->dct.block[16+i8].residual_ac, 15 );
-        block_residual_write_cavlc( h, &s, 20+i8, h->dct.block[20+i8].residual_ac, 15 );
+        block_residual_write_cavlc( h, &s, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
+        block_residual_write_cavlc( h, &s, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
 
         i8 += x264_pixel_size[i_pixel].h >> 3;
     }
@@ -770,10 +770,10 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
     for( i4 = 0; i4 < 4; i4++ )
     {
         for( i = 0; i < 16; i++ )
-            h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4];
+            h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4];
         h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] =
-            array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 );
-        block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
+            array_non_zero_count( h->dct.luma4x4[i4+i8*4], 16 );
+        block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
     }
     return h->out.bs.i_bits_encoded;
 }
@@ -781,7 +781,7 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
 static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
 {
     h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
-    block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.block[i4].luma4x4, 16 );
+    block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.luma4x4[i4], 16 );
     return h->out.bs.i_bits_encoded;
 }
 
@@ -797,7 +797,7 @@ static int x264_i8x8_chroma_size_cavlc( x264_t *h )
         {
             int i;
             for( i = 16; i < 24; i++ )
-                block_residual_write_cavlc( h, &h->out.bs, i, h->dct.block[i].residual_ac, 15 );
+                block_residual_write_cavlc( h, &h->out.bs, i, h->dct.luma4x4[i]+1, 15 );
         }
     }
     return h->out.bs.i_bits_encoded;
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 5f288635..eb221b7a 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -89,7 +89,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
 
     if( h->mb.b_lossless )
     {
-        h->zigzagf.sub_4x4( h->dct.block[idx].luma4x4, p_src, p_dst );
+        h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
         return;
     }
 
@@ -100,7 +100,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
     else
         h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
 
-    h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4 );
+    h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
     h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
 
     /* output samples to fdec */
@@ -142,7 +142,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
         {
             int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
             int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
-            h->zigzagf.sub_4x4ac( h->dct.block[i].residual_ac, p_src+oe, p_dst+od );
+            h->zigzagf.sub_4x4( h->dct.luma4x4[i], p_src+oe, p_dst+od );
             dct4x4[0][block_idx_x[i]][block_idx_y[i]] = p_src[oe] - p_dst[od];
             p_dst[od] = p_src[oe];
         }
@@ -162,7 +162,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
         else
             h->quantf.quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
 
-        h->zigzagf.scan_4x4ac( h->dct.block[i].residual_ac, dct4x4[1+i] );
+        h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[1+i] );
         h->quantf.dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
     }
 
@@ -204,7 +204,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
             {
                 int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
                 int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
-                h->zigzagf.sub_4x4ac( h->dct.block[16+i+ch*4].residual_ac, p_src+oe, p_dst+od );
+                h->zigzagf.sub_4x4( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od );
                 h->dct.chroma_dc[ch][i] = p_src[oe] - p_dst[od];
                 p_dst[od] = p_src[oe];
             }
@@ -220,11 +220,11 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
 
             /* no trellis; it doesn't seem to help chroma noticeably */
             h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qscale], h->quant4_bias[CQM_4IC+b_inter][i_qscale] );
-            h->zigzagf.scan_4x4ac( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
+            h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
 
             if( b_decimate )
             {
-                i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
+                i_decimate_score += x264_mb_decimate_score( h->dct.luma4x4[16+i+ch*4]+1, 15 );
             }
         }
 
@@ -239,7 +239,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
         if( b_decimate && i_decimate_score < 7 )
         {
             /* Near null chroma 8x8 block so make it null (bits saving) */
-            memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) );
+            memset( &h->dct.luma4x4[16+ch*4], 0, 4 * sizeof( *h->dct.luma4x4 ) );
             if( !array_non_zero( dct2x2 ) )
                 continue;
             memset( dct4x4, 0, sizeof( dct4x4 ) );
@@ -259,7 +259,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
     h->mb.i_cbp_chroma = 0;
     for( i = 0; i < 8; i++ )
     {
-        int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
+        int nz = array_non_zero_count( h->dct.luma4x4[16+i]+1, 15 );
         h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
         h->mb.i_cbp_chroma |= nz;
     }
@@ -395,7 +395,7 @@ void x264_macroblock_encode( x264_t *h )
             h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
             /* In RD mode, restore the now-overwritten DCT data. */
             if( h->mb.i_skip_intra == 2 )
-                h->mc.memcpy_aligned( h->dct.block, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
+                h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
         }
         for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
         {
@@ -424,7 +424,7 @@ void x264_macroblock_encode( x264_t *h )
             {
                 int x = 4*block_idx_x[i4x4];
                 int y = 4*block_idx_y[i4x4];
-                h->zigzagf.sub_4x4( h->dct.block[i4x4].luma4x4,
+                h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
                                     h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
                                     h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
             }
@@ -497,10 +497,10 @@ void x264_macroblock_encode( x264_t *h )
                     else
                         h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
 
-                    h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4[idx] );
+                    h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
                     
                     if( b_decimate )
-                        i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
+                        i_decimate_8x8 += x264_mb_decimate_score( h->dct.luma4x4[idx], 16 );
                 }
 
                 /* decimate this 8x8 block */
@@ -508,13 +508,13 @@ void x264_macroblock_encode( x264_t *h )
                 if( i_decimate_8x8 < 4 && b_decimate )
                 {
                     memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) );
-                    memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) );
+                    memset( &h->dct.luma4x4[i8x8*4], 0, 4 * sizeof( *h->dct.luma4x4 ) );
                     nnz8x8[i8x8] = 0;
                 }
             }
 
             if( i_decimate_mb < 6 && b_decimate )
-                memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) );
+                memset( h->dct.luma4x4, 0, 16 * sizeof( *h->dct.luma4x4 ) );
             else
             {
                 for( i8x8 = 0; i8x8 < 4; i8x8++ )
@@ -545,7 +545,7 @@ void x264_macroblock_encode( x264_t *h )
     {
         for( i = 0; i < 16; i++ )
         {
-            const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
+            const int nz = array_non_zero_count( h->dct.luma4x4[i]+1, 15 );
             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
             if( nz > 0 )
                 h->mb.i_cbp_luma = 0x0f;
@@ -569,7 +569,7 @@ void x264_macroblock_encode( x264_t *h )
     {
         for( i = 0; i < 16; i++ )
         {
-            const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
+            const int nz = array_non_zero_count( h->dct.luma4x4[i], 16 );
             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
             if( nz > 0 )
                 h->mb.i_cbp_luma |= 1 << (i/4);
@@ -697,9 +697,9 @@ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
         for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
         {
             h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
-            h->zigzagf.scan_4x4ac( dctscan, dct4x4[i4x4] );
+            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
 
-            i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
+            i_decimate_mb += x264_mb_decimate_score( dctscan+1, 15 );
             if( i_decimate_mb >= 7 )
             {
                 return 0;
@@ -812,13 +812,13 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
         h->quantf.quant_4x4( dct4x4[2], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
         h->quantf.quant_4x4( dct4x4[3], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
         for( i4 = 0; i4 < 4; i4++ )
-            h->zigzagf.scan_4x4( h->dct.block[i8*4+i4].luma4x4, dct4x4[i4] );
+            h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] );
 
         if( b_decimate )
         {
             int i_decimate_8x8 = 0;
             for( i4 = 0; i4 < 4 && i_decimate_8x8 < 4; i4++ )
-                i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[i8*4+i4].luma4x4, 16 );
+                i_decimate_8x8 += x264_mb_decimate_score( h->dct.luma4x4[i8*4+i4], 16 );
             nnz8x8 = 4 <= i_decimate_8x8;
         }
         else
@@ -842,7 +842,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
 
         h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
         h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
-        h->zigzagf.scan_4x4ac( h->dct.block[16+i8+ch*4].residual_ac, dct4x4 );
+        h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
         if( array_non_zero( dct4x4 ) )
         {
             h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
diff --git a/tools/checkasm.c b/tools/checkasm.c
index 74b2bf91..9e71e612 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -385,9 +385,7 @@ static int check_dct( int cpu_ref, int cpu_new )
     ok = 1; used_asm = 0;
     TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
     TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16  );
-    TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 );
     TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
-    TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 );
     report( "zigzag_frame :" );
 
     x264_zigzag_init( 0, &zigzag_c, 1 );
@@ -397,9 +395,7 @@ static int check_dct( int cpu_ref, int cpu_new )
     ok = 1; used_asm = 0;
     TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
     TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16  );
-    TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 );
     TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
-    TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 );
     report( "zigzag_field :" );
 #undef TEST_ZIGZAG_SCAN
 #undef TEST_ZIGZAG_SUB