checkasm check whether callee-saved regs are correctly saved

author Loren Merritt <pengvado@akuvian.org>

Mon, 17 Mar 2008 04:40:43 +0000 (22:40 -0600)

committer Loren Merritt <pengvado@akuvian.org>

Mon, 17 Mar 2008 05:03:19 +0000 (23:03 -0600)
author Loren Merritt <pengvado@akuvian.org>
Mon, 17 Mar 2008 04:40:43 +0000 (22:40 -0600)
committer Loren Merritt <pengvado@akuvian.org>
Mon, 17 Mar 2008 05:03:19 +0000 (23:03 -0600)
diff --git a/Makefile b/Makefile

index 3f3a16ba7bc950ffa8d7f3479d63399843acd9c0..0e7d712b1984535e1c6be762fd02efc7bcba67b1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -32,6 +32,7 @@ ASMSRC  = $(X86SRC) common/x86/pixel-32.asm
  OBJASM  = $(ASMSRC:%.asm=%.o)
  ASFLAGS += -Icommon/x86/
  $(OBJASM): common/x86/x86inc.asm common/x86/x86inc-32.asm
+checkasm: tools/checkasm-32.o
  endif
  
  ifeq ($(ARCH),X86_64)
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm

index 029592226ee1830466bd14668c1d25eb5dc90710..e1d074a5581ce63f325bd2e6ec09ec77fd446266 100644 (file)
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -373,7 +373,7 @@ cglobal x264_pixel_avg_weight_w8_mmxext, 4,5
  ;-----------------------------------------------------------------------------
  ; int x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int )
  ;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_weight_4x4_mmxext, 4,4
+cglobal x264_pixel_avg_weight_4x4_mmxext, 4,4,1
      BIWEIGHT_START_MMX 0
      BIWEIGHT_4P_MMX  [r0     ], [r2     ]
      BIWEIGHT_4P_MMX  [r0+r1  ], [r2+r3  ]
diff --git a/tools/checkasm-32.asm b/tools/checkasm-32.asm

new file mode 100644 (file)

index 0000000..15fbf24
--- /dev/null
+++ b/tools/checkasm-32.asm
@@ -0,0 +1,72 @@
+;*****************************************************************************
+;* checkasm-32.asm
+;*****************************************************************************
+;* Copyright (C) 2008 Loren Merritt <lorenm@u.washington.edu>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+;*****************************************************************************
+
+%include "x86inc.asm"
+
+SECTION_RODATA
+
+error_message: db "failed to preserve register", 10, 0
+
+SECTION .text
+
+cextern printf
+
+; max number of args used by any x264 asm function.
+%define max_args 8
+
+; just random numbers to reduce the chance of incidental match
+%define n3 dword 0x6549315c
+%define n4 dword 0xe02f3e23
+%define n5 dword 0xb78d0d1d
+%define n6 dword 0x33627ba7
+
+;-----------------------------------------------------------------------------
+; long x264_checkasm_call( long (*func)(), int *ok, ... )
+;-----------------------------------------------------------------------------
+cglobal x264_checkasm_call, 1,7
+    mov  r3, n3
+    mov  r4, n4
+    mov  r5, n5
+    mov  r6, n6
+%rep max_args
+    push dword [esp+24+max_args*4]
+%endrep
+    call r0
+    add  esp, max_args*4
+    xor  r3, n3
+    xor  r4, n4
+    xor  r5, n5
+    xor  r6, n6
+    or   r3, r4
+    or   r5, r6
+    or   r3, r5
+    jz .ok
+    mov  r3, eax
+    picgetgot r1
+    lea  r1, [error_message GLOBAL]
+    push r1
+    xor  eax, eax
+    call printf
+    add  esp, 4
+    mov  r1, r1m
+    mov  dword [r1], 0
+    mov  eax, r3
+.ok:
+    RET
diff --git a/tools/checkasm.c b/tools/checkasm.c

index dc500b407b126a0ba035f492cf1e7fa61f0cef1d..91f80de06f18e67ce1fad37312e1c258b8851e43 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -15,6 +15,16 @@ uint8_t * buf3, * buf4;
      if( !ok ) ret = -1; \
  }
  
+/* detect when callee-saved regs aren't saved.
+ * needs an explicit asm check because it only sometimes crashes in normal use. */
+#define call_c(func,...) func(__VA_ARGS__)
+#ifdef ARCH_X86
+long x264_checkasm_call( long (*func)(), int *ok, ... );
+#define call_a(func,...) x264_checkasm_call((long(*)())func, &ok, __VA_ARGS__)
+#else
+#define call_a call_c
+#endif
+
  static int check_pixel( int cpu_ref, int cpu_new )
  {
      x264_pixel_function_t pixel_c;
@@ -47,8 +57,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              for( j=0; j<64; j++ ) \
              { \
                  used_asm = 1; \
-                res_c   = pixel_c.name[i]( buf1, 32, buf2+j, 16 ); \
-                res_asm = pixel_asm.name[i]( buf1, 32, buf2+j, 16 ); \
+                res_c   = call_c( pixel_c.name[i], buf1, 32, buf2+j, 16 ); \
+                res_asm = call_a( pixel_asm.name[i], buf1, 32, buf2+j, 16 ); \
                  if( res_c != res_asm ) \
                  { \
                      ok = 0; \
@@ -81,10 +91,10 @@ static int check_pixel( int cpu_ref, int cpu_new )
                  if(N==4) \
                  { \
                      res_c[3] = pixel_c.sad[i]( buf1, 16, pix2+99, 32 ); \
-                    pixel_asm.sad_x4[i]( buf1, pix2, pix2+30, pix2+1, pix2+99, 32, res_asm ); \
+                    call_a( pixel_asm.sad_x4[i], buf1, pix2, pix2+30, pix2+1, pix2+99, 32, res_asm ); \
                  } \
                  else \
-                    pixel_asm.sad_x3[i]( buf1, pix2, pix2+30, pix2+1, 32, res_asm ); \
+                    call_a( pixel_asm.sad_x3[i], buf1, pix2, pix2+30, pix2+1, 32, res_asm ); \
                  if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
                  { \
                      ok = 0; \
@@ -111,7 +121,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
              pred[i]( buf3+40, ##__VA_ARGS__ ); \
              res_c[i] = pixel_c.satd( buf1+40, 16, buf3+40, 32 ); \
          } \
-        pixel_asm.name( buf1+40, i8x8 ? edge : buf3+40, res_asm ); \
+        call_a( pixel_asm.name, buf1+40, i8x8 ? edge : buf3+40, res_asm ); \
          if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
          { \
              ok = 0; \
@@ -147,7 +157,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
      ok = 1; used_asm = 0;
      for( i=0; i<32; i++ )
          cost_mv[i] = i*10;
-    for( i=0; i<100; i++ )
+    for( i=0; i<100 && ok; i++ )
          if( pixel_asm.ads[i&3] != pixel_ref.ads[i&3] )
          {
              DECLARE_ALIGNED( uint16_t, sums[72], 16 );
@@ -160,8 +170,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              for( j=0; j<4; j++ )
                  dc[j] = rand() & 0x3fff;
              used_asm = 1;
-            mvn_c = pixel_c.ads[i&3]( dc, sums, 32, cost_mv, mvs_c, 28, thresh );
-            mvn_a = pixel_asm.ads[i&3]( dc, sums, 32, cost_mv, mvs_a, 28, thresh );
+            mvn_c = call_c( pixel_c.ads[i&3], dc, sums, 32, cost_mv, mvs_c, 28, thresh );
+            mvn_a = call_a( pixel_asm.ads[i&3], dc, sums, 32, cost_mv, mvs_a, 28, thresh );
              if( mvn_c != mvn_a || memcmp( mvs_c, mvs_a, mvn_c*sizeof(*mvs_c) ) )
              {
                  ok = 0;
@@ -212,8 +222,8 @@ static int check_dct( int cpu_ref, int cpu_new )
      if( dct_asm.name != dct_ref.name ) \
      { \
          used_asm = 1; \
-        dct_c.name( t1, buf1, buf2 ); \
-        dct_asm.name( t2, buf1, buf2 ); \
+        call_c( dct_c.name, t1, buf1, buf2 ); \
+        call_a( dct_asm.name, t2, buf1, buf2 ); \
          if( memcmp( t1, t2, size ) ) \
          { \
              ok = 0; \
@@ -255,8 +265,8 @@ static int check_dct( int cpu_ref, int cpu_new )
          memcpy( buf4, buf1, 32*32 ); \
          memcpy( dct1, src, 512 ); \
          memcpy( dct2, src, 512 ); \
-        dct_c.name( buf3, (void*)dct1 ); \
-        dct_asm.name( buf4, (void*)dct2 ); \
+        call_c( dct_c.name, buf3, (void*)dct1 ); \
+        call_a( dct_asm.name, buf4, (void*)dct2 ); \
          if( memcmp( buf3, buf4, 32*32 ) ) \
          { \
              ok = 0; \
@@ -281,8 +291,8 @@ static int check_dct( int cpu_ref, int cpu_new )
          int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
          int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
          used_asm = 1;
-        dct_c.dct4x4dc( dct1 );
-        dct_asm.dct4x4dc( dct2 );
+        call_c( dct_c.dct4x4dc, dct1 );
+        call_a( dct_asm.dct4x4dc, dct2 );
          if( memcmp( dct1, dct2, 32 ) )
          {
              ok = 0;
@@ -294,8 +304,8 @@ static int check_dct( int cpu_ref, int cpu_new )
          int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
          int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
          used_asm = 1;
-        dct_c.idct4x4dc( dct1 );
-        dct_asm.idct4x4dc( dct2 );
+        call_c( dct_c.idct4x4dc, dct1 );
+        call_a( dct_asm.idct4x4dc, dct2 );
          if( memcmp( dct1, dct2, 32 ) )
          {
              ok = 0;
@@ -310,8 +320,8 @@ static int check_dct( int cpu_ref, int cpu_new )
          int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
          int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
          used_asm = 1;
-        dct_c.dct2x2dc( dct1 );
-        dct_asm.dct2x2dc( dct2 );
+        call_c( dct_c.dct2x2dc, dct1 );
+        call_a( dct_asm.dct2x2dc, dct2 );
          if( memcmp( dct1, dct2, 4*2 ) )
          {
              ok = 0;
@@ -323,8 +333,8 @@ static int check_dct( int cpu_ref, int cpu_new )
          int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
          int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
          used_asm = 1;
-        dct_c.idct2x2dc( dct1 );
-        dct_asm.idct2x2dc( dct2 );
+        call_c( dct_c.idct2x2dc, dct1 );
+        call_a( dct_asm.idct2x2dc, dct2 );
          if( memcmp( dct1, dct2, 4*2 ) )
          {
              ok = 0;
@@ -344,8 +354,8 @@ static int check_dct( int cpu_ref, int cpu_new )
      if( zigzag_asm.name != zigzag_ref.name ) \
      { \
          used_asm = 1; \
-        zigzag_c.name( t1, dct ); \
-        zigzag_asm.name( t2, dct ); \
+        call_c( zigzag_c.name, t1, dct ); \
+        call_a( zigzag_asm.name, t2, dct ); \
          if( memcmp( t1, t2, size ) ) \
          { \
              ok = 0; \
@@ -359,8 +369,8 @@ static int check_dct( int cpu_ref, int cpu_new )
          used_asm = 1; \
          memcpy( buf3, buf1, 16*FDEC_STRIDE ); \
          memcpy( buf4, buf1, 16*FDEC_STRIDE ); \
-        zigzag_c.name( t1, buf2, buf3 );  \
-        zigzag_asm.name( t2, buf2, buf4 );    \
+        call_c( zigzag_c.name, t1, buf2, buf3 );  \
+        call_a( zigzag_asm.name, t2, buf2, buf4 );    \
          if( memcmp( t1, t2, size )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) )  \
          { \
              ok = 0; \
@@ -424,8 +434,8 @@ static int check_mc( int cpu_ref, int cpu_new )
              used_asm = 1; \
              memset(buf3, 0xCD, 1024); \
              memset(buf4, 0xCD, 1024); \
-            mc_c.mc_luma( dst1, 32, src2, 16, dx, dy, w, h ); \
-            mc_a.mc_luma( dst2, 32, src2, 16, dx, dy, w, h ); \
+            call_c( mc_c.mc_luma, dst1, 32, src2, 16, dx, dy, w, h ); \
+            call_a( mc_a.mc_luma, dst2, 32, src2, 16, dx, dy, w, h ); \
              if( memcmp( buf3, buf4, 1024 ) ) \
              { \
                  fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h ); \
@@ -439,8 +449,8 @@ static int check_mc( int cpu_ref, int cpu_new )
              used_asm = 1; \
              memset(buf3, 0xCD, 1024); \
              memset(buf4, 0xCD, 1024); \
-            mc_c.mc_luma( dst1, 32, src2, 16, dx, dy, w, h ); \
-            ref = mc_a.get_ref( ref, &ref_stride, src2, 16, dx, dy, w, h ); \
+            call_c( mc_c.mc_luma, dst1, 32, src2, 16, dx, dy, w, h ); \
+            ref = (uint8_t*) call_a( mc_a.get_ref, ref, &ref_stride, src2, 16, dx, dy, w, h ); \
              for( i=0; i<h; i++ ) \
                  if( memcmp( dst1+i*32, ref+i*ref_stride, w ) ) \
                  { \
@@ -456,8 +466,8 @@ static int check_mc( int cpu_ref, int cpu_new )
              used_asm = 1; \
              memset(buf3, 0xCD, 1024); \
              memset(buf4, 0xCD, 1024); \
-            mc_c.mc_chroma( dst1, 16, src, 32, dx, dy, w, h ); \
-            mc_a.mc_chroma( dst2, 16, src, 32, dx, dy, w, h ); \
+            call_c( mc_c.mc_chroma, dst1, 16, src, 32, dx, dy, w, h ); \
+            call_a( mc_a.mc_chroma, dst2, 16, src, 32, dx, dy, w, h ); \
              /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\
              for( j=0; j<h; j++ ) \
                  for( i=w; i<4; i++ ) \
@@ -508,8 +518,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          if( mc_a.name[i] != mc_ref.name[i] ) \
          { \
              used_asm = 1; \
-            mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
-            mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
+            call_c( mc_c.name[i], buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
+            call_a( mc_a.name[i], buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
              if( memcmp( buf3, buf4, 1024 ) )               \
              { \
                  ok = 0; \
@@ -532,8 +542,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          ok = 1; used_asm = 1;
          memset( buf3, 0, 4096 );
          memset( buf4, 0, 4096 );
-        mc_c.hpel_filter( dstc[0], dstc[1], dstc[2], src, 64, 48, 10 );
-        mc_a.hpel_filter( dsta[0], dsta[1], dsta[2], src, 64, 48, 10 );
+        call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], src, 64, 48, 10 );
+        call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], src, 64, 48, 10 );
          for( i=0; i<3; i++ )
              for( j=0; j<10; j++ )
                  //FIXME ideally the first pixels would match too, but they aren't actually used
@@ -591,8 +601,8 @@ static int check_deblock( int cpu_ref, int cpu_new )
          if( db_a.name != db_ref.name ) \
          { \
              used_asm = 1; \
-            db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
-            db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_c( db_c.name, &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_a( db_a.name, &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
              if( memcmp( buf3, buf4, 1024 ) )               \
              { \
                  ok = 0; \
@@ -692,8 +702,8 @@ static int check_quant( int cpu_ref, int cpu_new )
              { \
                  for( i = 0; i < 16; i++ ) \
                      dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
-                qf_c.name( (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
-                qf_a.name( (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
+                call_c( qf_c.name, (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
+                call_a( qf_a.name, (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
                  if( memcmp( dct1, dct2, 16*2 ) )       \
                  { \
                      oks[0] = 0; \
@@ -710,8 +720,8 @@ static int check_quant( int cpu_ref, int cpu_new )
              for( qp = 51; qp > 0; qp-- ) \
              { \
                  INIT_QUANT##w() \
-                qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
-                qf_a.qname( (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+                call_c( qf_c.qname, (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+                call_a( qf_a.qname, (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
                  if( memcmp( dct1, dct2, w*w*2 ) ) \
                  { \
                      oks[0] = 0; \
@@ -735,10 +745,10 @@ static int check_quant( int cpu_ref, int cpu_new )
              for( qp = 51; qp > 0; qp-- ) \
              { \
                  INIT_QUANT##w() \
-                qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+                call_c( qf_c.qname, (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
                  memcpy( dct2, dct1, w*w*2 ); \
-                qf_c.dqname( (void*)dct1, h->dequant##w##_mf[block], qp ); \
-                qf_a.dqname( (void*)dct2, h->dequant##w##_mf[block], qp ); \
+                call_c( qf_c.dqname, (void*)dct1, h->dequant##w##_mf[block], qp ); \
+                call_a( qf_a.dqname, (void*)dct2, h->dequant##w##_mf[block], qp ); \
                  if( memcmp( dct1, dct2, w*w*2 ) ) \
                  { \
                      oks[1] = 0; \
@@ -801,8 +811,8 @@ static int check_intra( int cpu_ref, int cpu_new )
          used_asm = 1; \
          memcpy( buf3, buf1, 32*20 );\
          memcpy( buf4, buf1, 32*20 );\
-        ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\
-        ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\
+        call_c( ip_c.name[dir], buf3+48, ##__VA_ARGS__ );\
+        call_a( ip_a.name[dir], buf4+48, ##__VA_ARGS__ );\
          if( memcmp( buf3, buf4, 32*20 ) )\
          {\
              fprintf( stderr, #name "[%d] :  [FAILED]\n", dir );\
author	Loren Merritt <pengvado@akuvian.org>
	Mon, 17 Mar 2008 04:40:43 +0000 (22:40 -0600)
committer	Loren Merritt <pengvado@akuvian.org>
	Mon, 17 Mar 2008 05:03:19 +0000 (23:03 -0600)
Makefile		patch \| blob \| history
common/x86/mc-a.asm		patch \| blob \| history
tools/checkasm-32.asm	[new file with mode: 0644]	patch \| blob
tools/checkasm.c		patch \| blob \| history