From: Loren Merritt <pengvado@akuvian.org>
Date: Thu, 20 Mar 2008 05:43:19 +0000 (-0600)
Subject: reduce zigzag arrays from int to int16_t
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=30da25a99e24e5c1ff5972b7f5c22c4be2a944b1;p=libx264

reduce zigzag arrays from int to int16_t
---

diff --git a/common/common.h b/common/common.h
index 5ee3ef8a..c21ad1c8 100644
--- a/common/common.h
+++ b/common/common.h
@@ -337,14 +337,14 @@ struct x264_t
     /* Current MB DCT coeffs */
     struct
     {
-        DECLARE_ALIGNED( int, luma16x16_dc[16], 16 );
-        DECLARE_ALIGNED( int, chroma_dc[2][4], 16 );
+        DECLARE_ALIGNED( int16_t, luma16x16_dc[16], 16 );
+        DECLARE_ALIGNED( int16_t, chroma_dc[2][4], 16 );
         // FIXME merge with union
-        DECLARE_ALIGNED( int, luma8x8[4][64], 16 );
+        DECLARE_ALIGNED( int16_t, luma8x8[4][64], 16 );
         union
         {
-            DECLARE_ALIGNED( int, residual_ac[15], 16 );
-            DECLARE_ALIGNED( int, luma4x4[16], 16 );
+            DECLARE_ALIGNED( int16_t, residual_ac[15], 16 );
+            DECLARE_ALIGNED( int16_t, luma4x4[16], 16 );
         } block[16+8];
     } dct;
 
@@ -441,8 +441,8 @@ struct x264_t
             /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */            
             DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 );
             DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 );
-            DECLARE_ALIGNED( int, i8x8_dct_buf[3][64], 16 );
-            DECLARE_ALIGNED( int, i4x4_dct_buf[15][16], 16 );
+            DECLARE_ALIGNED( int16_t, i8x8_dct_buf[3][64], 16 );
+            DECLARE_ALIGNED( int16_t, i4x4_dct_buf[15][16], 16 );
 
             /* pointer over mb of the frame to be compressed */
             uint8_t *p_fenc[3];
diff --git a/common/dct.c b/common/dct.c
index 74e5a917..895306b5 100644
--- a/common/dct.c
+++ b/common/dct.c
@@ -458,9 +458,10 @@ void x264_dct_init_weights( void )
 }
 
 
-#define ZIG(i,y,x) level[i] = dct[x][y];
+// gcc pessimizes multi-dimensional arrays here, even with constant indices
+#define ZIG(i,y,x) level[i] = dct[0][x*8+y];
 
-static void zigzag_scan_8x8_frame( int level[64], int16_t dct[8][8] )
+static void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[8][8] )
 {
     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
@@ -480,7 +481,7 @@ static void zigzag_scan_8x8_frame( int level[64], int16_t dct[8][8] )
     ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
 }
 
-static void zigzag_scan_8x8_field( int level[64], int16_t dct[8][8] )
+static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[8][8] )
 {
     ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,2,0) ZIG( 3,0,1)
     ZIG( 4,1,1) ZIG( 5,3,0) ZIG( 6,4,0) ZIG( 7,2,1)
@@ -500,7 +501,10 @@ static void zigzag_scan_8x8_field( int level[64], int16_t dct[8][8] )
     ZIG(60,4,7) ZIG(61,5,7) ZIG(62,6,7) ZIG(63,7,7)
 }
 
-static void zigzag_scan_4x4_frame( int level[16], int16_t dct[4][4] )
+#undef ZIG
+#define ZIG(i,y,x) level[i] = dct[0][x*4+y];
+
+static void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[4][4] )
 {
     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
@@ -508,15 +512,16 @@ static void zigzag_scan_4x4_frame( int level[16], int16_t dct[4][4] )
     ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
 }
 
-static void zigzag_scan_4x4_field( int level[16], int16_t dct[4][4] )
+static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
 {
-    ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)
-    ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)
-    ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)
-    ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
+    *(uint32_t*)level = *(uint32_t*)dct;
+    ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
+    *(uint32_t*)(level+6) = *(uint32_t*)(*dct+6);
+    *(uint64_t*)(level+8) = *(uint64_t*)(*dct+8);
+    *(uint64_t*)(level+12) = *(uint64_t*)(*dct+12);
 }
 
-static void zigzag_scan_4x4ac_frame( int level[15], int16_t dct[4][4] )
+static void zigzag_scan_4x4ac_frame( int16_t level[15], int16_t dct[4][4] )
 {
                 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
     ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
@@ -524,7 +529,7 @@ static void zigzag_scan_4x4ac_frame( int level[15], int16_t dct[4][4] )
     ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
 }
 
-static void zigzag_scan_4x4ac_field( int level[15], int16_t dct[4][4] )
+static void zigzag_scan_4x4ac_field( int16_t level[15], int16_t dct[4][4] )
 {
                 ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0)
     ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1)
@@ -533,7 +538,6 @@ static void zigzag_scan_4x4ac_field( int level[15], int16_t dct[4][4] )
 }
 
 #undef ZIG
-
 #define ZIG(i,y,x) {\
     int oe = x+y*FENC_STRIDE;\
     int od = x+y*FDEC_STRIDE;\
@@ -541,7 +545,7 @@ static void zigzag_scan_4x4ac_field( int level[15], int16_t dct[4][4] )
     p_dst[od] = p_src[oe];\
 }
 
-static void zigzag_sub_4x4_frame( int level[16], const uint8_t *p_src, uint8_t *p_dst )
+static void zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
 {
     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
@@ -549,7 +553,7 @@ static void zigzag_sub_4x4_frame( int level[16], const uint8_t *p_src, uint8_t *
     ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
 }
 
-static void zigzag_sub_4x4_field( int level[16], const uint8_t *p_src, uint8_t *p_dst )
+static void zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
 {
     ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)
     ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)
@@ -557,7 +561,7 @@ static void zigzag_sub_4x4_field( int level[16], const uint8_t *p_src, uint8_t *
     ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
 }
 
-static void zigzag_sub_4x4ac_frame( int level[15], const uint8_t *p_src, uint8_t *p_dst )
+static void zigzag_sub_4x4ac_frame( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst )
 {
                 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
     ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
@@ -565,7 +569,7 @@ static void zigzag_sub_4x4ac_frame( int level[15], const uint8_t *p_src, uint8_t
     ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
 }
 
-static void zigzag_sub_4x4ac_field( int level[15], const uint8_t *p_src, uint8_t *p_dst )
+static void zigzag_sub_4x4ac_field( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst )
 {
                 ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0)
     ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1)
@@ -585,12 +589,8 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
         pf->sub_4x4    = zigzag_sub_4x4_field;
         pf->sub_4x4ac  = zigzag_sub_4x4ac_field;
 #ifdef HAVE_MMX
-#ifdef ARCH_X86
-        if( cpu&X264_CPU_MMX )
-            pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmx;
-#endif
-        if( cpu&X264_CPU_SSE2 )
-            pf->scan_4x4 = x264_zigzag_scan_4x4_field_sse2;
+        if( cpu&X264_CPU_MMXEXT )
+            pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext;
 #endif
 
 #ifdef ARCH_PPC
diff --git a/common/dct.h b/common/dct.h
index 4c71e478..cf7dbbd1 100644
--- a/common/dct.h
+++ b/common/dct.h
@@ -108,11 +108,11 @@ typedef struct
 
 typedef struct
 {
-    void (*scan_8x8)( int level[64], int16_t dct[8][8] );
-    void (*scan_4x4)( int level[16], int16_t dct[4][4] );
-    void (*scan_4x4ac)( int level[15], int16_t dct[4][4] );
-    void (*sub_4x4)( int level[16], const uint8_t *p_src, uint8_t *p_dst );
-    void (*sub_4x4ac)( int level[15], const uint8_t *p_src, uint8_t *p_dst );
+    void (*scan_8x8)( int16_t level[64], int16_t dct[8][8] );
+    void (*scan_4x4)( int16_t level[16], int16_t dct[4][4] );
+    void (*scan_4x4ac)( int16_t level[15], int16_t dct[4][4] );
+    void (*sub_4x4)( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst );
+    void (*sub_4x4ac)( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst );
 
 } x264_zigzag_function_t;
 
diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm
index b7de1780..260bcf03 100644
--- a/common/x86/dct-32.asm
+++ b/common/x86/dct-32.asm
@@ -526,35 +526,3 @@ ADD_NxN_IDCT x264_add16x16_idct8_mmx, x264_add8x8_idct8_mmx, 128, 8
 
 ADD_NxN_IDCT x264_add16x16_idct8_sse2, x264_add8x8_idct8_sse2, 128, 8
 
-;-----------------------------------------------------------------------------
-; void x264_zigzag_scan_4x4_field_mmx( int level[16], int16_t dct[4][4] )
-;-----------------------------------------------------------------------------
-cglobal x264_zigzag_scan_4x4_field_mmx
-    mov       edx, [esp+8]
-    mov       ecx, [esp+4]
-    punpcklwd mm0, [edx]
-    punpckhwd mm1, [edx]
-    punpcklwd mm2, [edx+8]
-    punpckhwd mm3, [edx+8]
-    punpcklwd mm4, [edx+16]
-    punpckhwd mm5, [edx+16]
-    punpcklwd mm6, [edx+24]
-    punpckhwd mm7, [edx+24]
-    psrad     mm0, 16
-    psrad     mm1, 16
-    psrad     mm2, 16
-    psrad     mm3, 16
-    psrad     mm4, 16
-    psrad     mm5, 16
-    psrad     mm6, 16
-    psrad     mm7, 16
-    movq      [ecx   ], mm0
-    movq      [ecx+16], mm2
-    movq      [ecx+24], mm3
-    movq      [ecx+32], mm4
-    movq      [ecx+40], mm5
-    movq      [ecx+48], mm6
-    movq      [ecx+56], mm7
-    movq      [ecx+12], mm1
-    movd      [ecx+ 8], mm2
-    ret
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index a23bf761..0c6d463b 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -272,24 +272,21 @@ ADD_NxN_IDCT x264_add16x16_idct8_sse2, x264_add8x8_idct8_sse2, 128, 8, 0,  8
 %endif
 
 
+
 ;-----------------------------------------------------------------------------
-; void x264_zigzag_scan_4x4_field_sse2( int level[16], int16_t dct[4][4] )
+; void x264_zigzag_scan_4x4_field_mmxext( int16_t level[16], int16_t dct[4][4] )
 ;-----------------------------------------------------------------------------
-cglobal x264_zigzag_scan_4x4_field_sse2, 2,2
-    punpcklwd xmm0, [r1]
-    punpckhwd xmm1, [r1]
-    punpcklwd xmm2, [r1+16]
-    punpckhwd xmm3, [r1+16]
-    psrad     xmm0, 16
-    psrad     xmm1, 16
-    psrad     xmm2, 16
-    psrad     xmm3, 16
-    movq   [r0   ], xmm0
-    movdqa [r0+16], xmm1
-    movdqa [r0+32], xmm2
-    movhlps   xmm0, xmm0
-    movdqa [r0+48], xmm3
-    movq   [r0+12], xmm0
-    movd   [r0+ 8], xmm1
+; sse2 is only 1 cycle faster, and ssse3/pshufb is slower on core2
+cglobal x264_zigzag_scan_4x4_field_mmxext, 2,3
+    pshufw     mm0, [r1+4], 0xd2
+    movq       mm1, [r1+16]
+    movq       mm2, [r1+24]
+    movq    [r0+4], mm0
+    movq   [r0+16], mm1
+    movq   [r0+24], mm2
+    mov        r2d, [r1]
+    mov       [r0], r2d
+    mov        r2d, [r1+12]
+    mov    [r0+12], r2d
     RET
 
diff --git a/common/x86/dct.h b/common/x86/dct.h
index 5e93a99f..5b88dbea 100644
--- a/common/x86/dct.h
+++ b/common/x86/dct.h
@@ -46,7 +46,6 @@ void x264_sub16x16_dct8_sse2( int16_t dct[4][8][8], uint8_t *pix1, uint8_t *pix2
 void x264_add8x8_idct8_sse2( uint8_t *dst, int16_t dct[8][8] );
 void x264_add16x16_idct8_sse2( uint8_t *dst, int16_t dct[4][8][8] );
 
-void x264_zigzag_scan_4x4_field_sse2( int level[16], int16_t dct[4][4] );
-void x264_zigzag_scan_4x4_field_mmx( int level[16], int16_t dct[4][4] );
+void x264_zigzag_scan_4x4_field_mmxext( int16_t level[16], int16_t dct[4][4] );
 
 #endif
diff --git a/encoder/cabac.c b/encoder/cabac.c
index e36744e5..c25c65cb 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -707,7 +707,7 @@ static const int last_coeff_flag_offset_8x8[63] = {
 static const int identity[16] =
     { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
 
-static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int *l, int i_count )
+static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count )
 {
     const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
     const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
diff --git a/encoder/cavlc.c b/encoder/cavlc.c
index a20c0b0f..ff1aed24 100644
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -63,7 +63,7 @@ static inline void bs_write_vlc( bs_t *s, vlc_t v )
 /****************************************************************************
  * block_residual_write_cavlc:
  ****************************************************************************/
-static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int *l, int i_count )
+static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *l, int i_count )
 {
     int level[16], run[16];
     int i_total, i_trailing;
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index d095b7c2..5f288635 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -26,7 +26,7 @@
 
 
 #define ZIG(i,y,x) level[i] = dct[x][y];
-static inline void zigzag_scan_2x2_dc( int level[4], int16_t dct[2][2] )
+static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[2][2] )
 {
     ZIG(0,0,0)
     ZIG(1,0,1)
@@ -43,7 +43,7 @@ static inline void zigzag_scan_2x2_dc( int level[4], int16_t dct[2][2] )
  *        for the complete mb: if score < 6 -> null
  *  chroma: for the complete mb: if score < 7 -> null
  */
-static int x264_mb_decimate_score( int *dct, int i_max )
+static int x264_mb_decimate_score( int16_t *dct, int i_max )
 {
     static const int i_ds_table4[16] = {
         3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
@@ -618,7 +618,7 @@ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
 {
     DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
     DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
-    DECLARE_ALIGNED( int,     dctscan[16], 16 );
+    DECLARE_ALIGNED( int16_t, dctscan[16], 16 );
 
     int i_qp = h->mb.i_qp;
     int mvp[2];
diff --git a/encoder/macroblock.h b/encoder/macroblock.h
index 3ca227da..ee69d407 100644
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -65,7 +65,7 @@ static inline int array_non_zero_int( void *v, int i_count )
     return 0;
 }
 
-static inline int array_non_zero_count( int *v, int i_count )
+static inline int array_non_zero_count( int16_t *v, int i_count )
 {
     int i;
     int i_nz;
diff --git a/tools/checkasm.c b/tools/checkasm.c
index 91f80de0..2b947b09 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -347,8 +347,8 @@ static int check_dct( int cpu_ref, int cpu_new )
     x264_zigzag_function_t zigzag_ref;
     x264_zigzag_function_t zigzag_asm;
 
-    int32_t level1[64] __attribute__((aligned(16)));
-    int32_t level2[64] __attribute__((aligned(16)));
+    int16_t level1[64] __attribute__((aligned(16)));
+    int16_t level2[64] __attribute__((aligned(16)));
 
 #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size )   \
     if( zigzag_asm.name != zigzag_ref.name ) \
@@ -356,7 +356,7 @@ static int check_dct( int cpu_ref, int cpu_new )
         used_asm = 1; \
         call_c( zigzag_c.name, t1, dct ); \
         call_a( zigzag_asm.name, t2, dct ); \
-        if( memcmp( t1, t2, size ) ) \
+        if( memcmp( t1, t2, size*sizeof(int16_t) ) ) \
         { \
             ok = 0; \
             fprintf( stderr, #name " [FAILED]\n" ); \
@@ -371,7 +371,7 @@ static int check_dct( int cpu_ref, int cpu_new )
         memcpy( buf4, buf1, 16*FDEC_STRIDE ); \
         call_c( zigzag_c.name, t1, buf2, buf3 );  \
         call_a( zigzag_asm.name, t2, buf2, buf4 );    \
-        if( memcmp( t1, t2, size )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) )  \
+        if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) )  \
         { \
             ok = 0; \
             fprintf( stderr, #name " [FAILED]\n" ); \
@@ -383,11 +383,11 @@ static int check_dct( int cpu_ref, int cpu_new )
     x264_zigzag_init( cpu_new, &zigzag_asm, 0 );
 
     ok = 1; used_asm = 0;
-    TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64*4 );
-    TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16*4  );
-    TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15*4 );
-    TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16*4 );
-    TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15*4 );
+    TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
+    TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16  );
+    TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 );
+    TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
+    TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 );
     report( "zigzag_frame :" );
 
     x264_zigzag_init( 0, &zigzag_c, 1 );
@@ -395,11 +395,11 @@ static int check_dct( int cpu_ref, int cpu_new )
     x264_zigzag_init( cpu_new, &zigzag_asm, 1 );
 
     ok = 1; used_asm = 0;
-    TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64*4 );
-    TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16*4  );
-    TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15*4 );
-    TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16*4 );
-    TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15*4 );
+    TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
+    TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16  );
+    TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 );
+    TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
+    TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 );
     report( "zigzag_field :" );
 #undef TEST_ZIGZAG_SCAN
 #undef TEST_ZIGZAG_SUB