From 8245feb264ce7b0ea75a654af9f90d74e45391e8 Mon Sep 17 00:00:00 2001 From: Oskar Arvidsson Date: Thu, 25 Nov 2010 23:05:21 +0100 Subject: [PATCH] Fix possible overflow in sub4x4_dct in 10-bit builds --- common/x86/dct-a.asm | 31 +++++++++++++++++++++++----- common/x86/x86util.asm | 11 ---------- tools/checkasm.c | 47 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm index e774d229..c94df915 100644 --- a/common/x86/dct-a.asm +++ b/common/x86/dct-a.asm @@ -77,6 +77,14 @@ cextern pd_32 SWAP %1, %2, %3 %endmacro +%macro DCT_UNPACK 3 + punpcklwd %3, %1 + punpckhwd %2, %1 + psrad %3, 16 + psrad %2, 16 + SWAP %1, %3 +%endmacro + %ifdef HIGH_BIT_DEPTH INIT_XMM ;----------------------------------------------------------------------------- @@ -175,11 +183,24 @@ cglobal sub4x4_dct_mmx, 3,3 LOAD_DIFF m2, m4, none, [r1+4*FENC_STRIDE], [r2+4*FDEC_STRIDE] DCT4_1D 0,1,2,3,4 TRANSPOSE4x4W 0,1,2,3,4 - DCT4_1D 0,1,2,3,4 - STORE_DIFF m0, m4, m5, [r0+ 0], [r0+ 8] - STORE_DIFF m1, m4, m5, [r0+16], [r0+24] - STORE_DIFF m2, m4, m5, [r0+32], [r0+40] - STORE_DIFF m3, m4, m5, [r0+48], [r0+56] + + SUMSUB_BADC w, m3, m0, m2, m1 + SUMSUB_BA w, m2, m3, m4 + DCT_UNPACK m2, m4, m5 + DCT_UNPACK m3, m6, m7 + mova [r0+ 0], m2 ; s03 + s12 + mova [r0+ 8], m4 + mova [r0+32], m3 ; s03 - s12 + mova [r0+40], m6 + + DCT_UNPACK m0, m2, m4 + DCT_UNPACK m1, m3, m5 + SUMSUB2_AB d, m0, m1, m4 + SUMSUB2_AB d, m2, m3, m5 + mova [r0+16], m0 ; d03*2 + d12 + mova [r0+24], m2 + mova [r0+48], m4 ; d03 - 2*d12 + mova [r0+56], m5 RET %else diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm index 124e0a66..2a2f48d2 100644 --- a/common/x86/x86util.asm +++ b/common/x86/x86util.asm @@ -584,16 +584,6 @@ packuswb %2, %1 %endmacro -%ifdef HIGH_BIT_DEPTH -%macro STORE_DIFF 5 - punpcklwd %2, %1 - punpckhwd %3, %1 - psrad %2, 16 - psrad %3, 16 - mova %4, %2 - mova %5, %3 -%endmacro -%else %macro STORE_DIFF 4 movh %2, %4 punpcklbw %2, %3 @@ -602,7 +592,6 @@ packuswb %1, %1 movh %4, %1 %endmacro -%endif %macro CLIPW 3 ;(dst, min, max) pmaxsw %1, %2 diff --git a/tools/checkasm.c b/tools/checkasm.c index 7c7ab372..1b45efc5 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -556,17 +556,54 @@ static int check_dct( int cpu_ref, int cpu_new ) x264_cqm_init( h ); x264_quant_init( h, 0, &qf ); + /* overflow test cases */ + for( int i = 0; i < 5; i++ ) + { + pixel *enc = &pbuf3[16*i*FENC_STRIDE]; + pixel *dec = &pbuf4[16*i*FDEC_STRIDE]; + + for( int j = 0; j < 16; j++ ) + { + int cond_a = (i < 2) ? 1 : ((j&3) == 0 || (j&3) == (i-1)); + int cond_b = (i == 0) ? 1 : !cond_a; + enc[0] = enc[1] = cond_a ? PIXEL_MAX : 0; + enc[2] = enc[3] = cond_b ? PIXEL_MAX : 0; + + for( int k = 0; k < 4; k++ ) + dec[k] = PIXEL_MAX - enc[k]; + + enc += FENC_STRIDE; + dec += FDEC_STRIDE; + } + } + #define TEST_DCT( name, t1, t2, size ) \ if( dct_asm.name != dct_ref.name ) \ { \ set_func_name( #name ); \ used_asm = 1; \ - call_c( dct_c.name, t1, pbuf1, pbuf2 ); \ - call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \ - if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \ + pixel *enc = pbuf3; \ + pixel *dec = pbuf4; \ + for( int j = 0; j < 5; j++) \ { \ - ok = 0; \ - fprintf( stderr, #name " [FAILED]\n" ); \ + call_c( dct_c.name, t1, &pbuf1[j*64], &pbuf2[j*64] ); \ + call_a( dct_asm.name, t2, &pbuf1[j*64], &pbuf2[j*64] ); \ + if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \ + { \ + ok = 0; \ + fprintf( stderr, #name " [FAILED]\n" ); \ + break; \ + } \ + call_c( dct_c.name, t1, enc, dec ); \ + call_a( dct_asm.name, t2, enc, dec ); \ + if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \ + { \ + ok = 0; \ + fprintf( stderr, #name " [FAILED] (overflow)\n" ); \ + break; \ + } \ + enc += 16*FENC_STRIDE; \ + dec += 16*FDEC_STRIDE; \ } \ } ok = 1; used_asm = 0; -- 2.40.0