From: Loren Merritt Date: Wed, 26 Oct 2005 06:31:35 +0000 (+0000) Subject: checkasm: check 8x8dct, mc average, quant, and SSE2. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6347263823e0fce26593fe36d812ba95931ebcb0;p=libx264 checkasm: check 8x8dct, mc average, quant, and SSE2. git-svn-id: svn://svn.videolan.org/x264/trunk@338 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/set.h b/common/set.h index c17e95f8..74fa64f2 100644 --- a/common/set.h +++ b/common/set.h @@ -211,6 +211,12 @@ static const uint8_t x264_cqm_flat16[64] = 16,16,16,16,16,16,16,16, 16,16,16,16,16,16,16,16 }; +static const uint8_t * const x264_cqm_jvt[6] = +{ + x264_cqm_jvt4i, x264_cqm_jvt4p, + x264_cqm_jvt4i, x264_cqm_jvt4p, + x264_cqm_jvt8i, x264_cqm_jvt8p +}; void x264_cqm_init( x264_t *h ); int x264_cqm_parse_file( x264_t *h, const char *filename ); diff --git a/encoder/set.c b/encoder/set.c index 98779898..7b79efc4 100644 --- a/encoder/set.c +++ b/encoder/set.c @@ -38,13 +38,6 @@ #include "config.h" #endif -static const uint8_t *const x264_cqm_jvt[6] = -{ - x264_cqm_jvt4i, x264_cqm_jvt4p, - x264_cqm_jvt4i, x264_cqm_jvt4p, - x264_cqm_jvt8i, x264_cqm_jvt8p -}; - static void scaling_list_write( bs_t *s, x264_pps_t *pps, int idx ) { const int len = idx<4 ? 16 : 64; diff --git a/testing/checkasm.c b/testing/checkasm.c index 58a403cd..4b621d96 100644 --- a/testing/checkasm.c +++ b/testing/checkasm.c @@ -2,8 +2,8 @@ #include #include -# #include "common/common.h" +#include "common/cpu.h" #ifdef HAVE_MMXEXT #include "common/i386/pixel.h" #include "common/i386/dct.h" @@ -18,159 +18,125 @@ uint8_t * buf1, * buf2; /* buf3, buf4: used to store output */ uint8_t * buf3, * buf4; +/* buf5: temp */ +uint8_t * buf5; -static int check_pixel() +#define report( name ) { \ + if( used_asm ) \ + fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \ + if( !ok ) ret = -1; \ +} + +static int check_pixel( int cpu_ref, int cpu_new ) { - x264_pixel_function_t pixel_c = {{0},{0},{0}}; - x264_pixel_function_t pixel_asm = {{0}, {0},{0}}; - int ret = 0, ok; + x264_pixel_function_t pixel_c; + x264_pixel_function_t pixel_ref; + x264_pixel_function_t pixel_asm; + int ret = 0, ok, used_asm; int i; - memset( &pixel_asm, 0, sizeof( x264_pixel_function_t ) ); x264_pixel_init( 0, &pixel_c ); -#ifdef HAVE_MMXEXT - x264_pixel_init( X264_CPU_MMX|X264_CPU_MMXEXT, &pixel_asm ); -#endif -#ifdef ARCH_PPC - x264_pixel_altivec_init( &pixel_asm ); -#endif - - for( i = 0, ok = 1; i < 7; i++ ) - { - int res_c, res_asm; - if( pixel_asm.sad[i] ) - { - res_c = pixel_c.sad[i]( buf1, 32, buf2, 32 ); - res_asm = pixel_asm.sad[i]( buf1, 32, buf2, 32 ); - if( res_c != res_asm ) - { - ok = 0; - fprintf( stderr, "sad[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); - } - } - } - if( ok ) - fprintf( stderr, " - pixel sad : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - pixel sat : [FAILED]\n" ); - } + x264_pixel_init( cpu_ref, &pixel_ref ); + x264_pixel_init( cpu_new, &pixel_asm ); - for( i = 0, ok = 1; i < 7; i++ ) - { - int res_c, res_asm; - if( pixel_asm.satd[i] ) - { - res_c = pixel_c.satd[i]( buf1, 32, buf2, 32 ); - res_asm = pixel_asm.satd[i]( buf1, 32, buf2, 32 ); - if( res_c != res_asm ) - { - ok = 0; - fprintf( stderr, "satd[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); - } - } - } - - if( ok ) - fprintf( stderr, " - pixel satd : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - pixel satd : [FAILED]\n" ); - } - - for( i = 0, ok = 1; i < 7; i++ ) - { - if( pixel_asm.avg[i] ) - { - memcpy( buf3, buf1, 32*32 ); - memcpy( buf4, buf1, 32*32 ); - pixel_c.satd[i]( buf3, 32, buf2, 32 ); - pixel_asm.satd[i]( buf4, 32, buf2, 32 ); - if( memcmp( buf3, buf4, 32*32 ) ) - { - ok = 0; - fprintf( stderr, "avg[%d]: [FAILED]\n", i ); - } - } - } +#define TEST_PIXEL( name ) \ + for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \ + { \ + int res_c, res_asm; \ + if( pixel_asm.name[i] != pixel_ref.name[i] ) \ + { \ + used_asm = 1; \ + res_c = pixel_c.name[i]( buf1, 32, buf2, 24 ); \ + res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \ + if( res_c != res_asm ) \ + { \ + ok = 0; \ + fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \ + } \ + } \ + } \ + report( "pixel " #name " :" ); - if( ok ) - fprintf( stderr, " - pixel avg : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - pixel avg : [FAILED]\n" ); - } + TEST_PIXEL( sad ); + TEST_PIXEL( ssd ); + TEST_PIXEL( satd ); return ret; } -static int check_dct() +static int check_dct( int cpu_ref, int cpu_new ) { x264_dct_function_t dct_c; + x264_dct_function_t dct_ref; x264_dct_function_t dct_asm; - int ret = 0, ok; + int ret = 0, ok, used_asm; int16_t dct1[16][4][4] __attribute((aligned(16))); int16_t dct2[16][4][4] __attribute((aligned(16))); - memset( &dct_asm, 0, sizeof( dct_asm ) ); x264_dct_init( 0, &dct_c ); -#ifdef HAVE_MMXEXT - x264_dct_init( X264_CPU_MMX|X264_CPU_MMXEXT, &dct_asm ); -#endif + x264_dct_init( cpu_ref, &dct_ref); + x264_dct_init( cpu_new, &dct_asm ); #define TEST_DCT( name, t1, t2, size ) \ - if( dct_asm.name ) \ + if( dct_asm.name != dct_ref.name ) \ { \ - dct_c.name( t1, buf1, 32, buf2, 32 ); \ - dct_asm.name( t2, buf1, 32, buf2, 32 ); \ + used_asm = 1; \ + dct_c.name( t1, buf1, 32, buf2, 24 ); \ + dct_asm.name( t2, buf1, 32, buf2, 24 ); \ if( memcmp( t1, t2, size ) ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ } \ } - ok = 1; + ok = 1; used_asm = 0; TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 ); TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 ); TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 ); - if( ok ) - fprintf( stderr, " - sub_dctXxX : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - sub_dctXxX : [FAILED]\n" ); - } + report( "sub_dct4 :" ); + + ok = 1; used_asm = 0; + TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 ); + TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 ); + report( "sub_dct8 :" ); #undef TEST_DCT -#define TEST_IDCT( name, t ) \ - if( dct_asm.name ) \ + /* copy coefs because idct8 modifies them in place */ + memcpy( buf5, dct1, 512 ); + +#define TEST_IDCT( name ) \ + if( dct_asm.name != dct_ref.name ) \ { \ + used_asm = 1; \ memcpy( buf3, buf1, 32*32 ); \ memcpy( buf4, buf1, 32*32 ); \ - dct_c.name( buf3, 32, t ); \ - dct_asm.name( buf4, 32, t ); \ + memcpy( dct1, buf5, 512 ); \ + memcpy( dct2, buf5, 512 ); \ + dct_c.name( buf3, 32, (void*)dct1 ); \ + dct_asm.name( buf4, 32, (void*)dct2 ); \ if( memcmp( buf3, buf4, 32*32 ) ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ } \ } - ok = 1; - TEST_IDCT( add4x4_idct, dct1[0] ); - TEST_IDCT( add8x8_idct, dct1 ); - TEST_IDCT( add16x16_idct, dct1 ); - if( ok ) - fprintf( stderr, " - add_idctXxX : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - add_idctXxX : [FAILED]\n" ); - } + ok = 1; used_asm = 0; + TEST_IDCT( add4x4_idct ); + TEST_IDCT( add8x8_idct ); + TEST_IDCT( add16x16_idct ); + report( "add_idct4 :" ); + + ok = 1; used_asm = 0; + TEST_IDCT( add8x8_idct8 ); + TEST_IDCT( add16x16_idct8 ); + report( "add_idct8 :" ); #undef TEST_IDCT - ok = 1; - if( dct_asm.dct4x4dc ) + ok = 1; used_asm = 0; + if( dct_asm.dct4x4dc != dct_ref.dct4x4dc ) { int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; - + used_asm = 1; dct_c.dct4x4dc( dct1 ); dct_asm.dct4x4dc( dct2 ); if( memcmp( dct1, dct2, 32 ) ) @@ -179,11 +145,11 @@ static int check_dct() fprintf( stderr, " - dct4x4dc : [FAILED]\n" ); } } - if( dct_asm.idct4x4dc ) + if( dct_asm.dct4x4dc != dct_ref.dct4x4dc ) { int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; - + used_asm = 1; dct_c.idct4x4dc( dct1 ); dct_asm.idct4x4dc( dct2 ); if( memcmp( dct1, dct2, 32 ) ) @@ -192,19 +158,14 @@ static int check_dct() fprintf( stderr, " - idct4x4dc : [FAILED]\n" ); } } - if( ok ) - fprintf( stderr, " - (i)dct4x4dc : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - (i)dct4x4dc : [FAILED]\n" ); - } + report( "(i)dct4x4dc :" ); - ok = 1; - if( dct_asm.dct2x2dc ) + ok = 1; used_asm = 0; + if( dct_asm.dct2x2dc != dct_ref.dct2x2dc ) { int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; - + used_asm = 1; dct_c.dct2x2dc( dct1 ); dct_asm.dct2x2dc( dct2 ); if( memcmp( dct1, dct2, 4*2 ) ) @@ -213,11 +174,11 @@ static int check_dct() fprintf( stderr, " - dct2x2dc : [FAILED]\n" ); } } - if( dct_asm.idct2x2dc ) + if( dct_asm.idct2x2dc != dct_ref.idct2x2dc ) { int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; - + used_asm = 1; dct_c.idct2x2dc( dct1 ); dct_asm.idct2x2dc( dct2 ); if( memcmp( dct1, dct2, 4*2 ) ) @@ -226,22 +187,16 @@ static int check_dct() fprintf( stderr, " - idct2x2dc : [FAILED]\n" ); } } - - if( ok ) - fprintf( stderr, " - (i)dct2x2dc : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - (i)dct2x2dc : [FAILED]\n" ); - } - + report( "(i)dct2x2dc :" ); return ret; } -static int check_mc() +static int check_mc( int cpu_ref, int cpu_new ) { - x264_mc_functions_t mc_c = {0,0,0}; - x264_mc_functions_t mc_a = {0,0,0}; + x264_mc_functions_t mc_c; + x264_mc_functions_t mc_ref; + x264_mc_functions_t mc_a; uint8_t *src = &buf1[2*32+2]; uint8_t *src2[4] = { &buf1[2*32+2], &buf1[7*32+2], @@ -249,20 +204,17 @@ static int check_mc() uint8_t *dst1 = &buf3[2*32+2]; uint8_t *dst2 = &buf4[2*32+2]; - int dx, dy; - int ret = 0, ok[2] = { 1, 1 }; + int dx, dy, i, w; + int ret = 0, ok, used_asm; x264_mc_init( 0, &mc_c ); -#ifdef HAVE_MMXEXT - x264_mc_mmxext_init( &mc_a ); -#endif -#ifdef ARCH_PPC - x264_mc_altivec_init( &mc_a ); -#endif + x264_mc_init( cpu_ref, &mc_ref ); + x264_mc_init( cpu_new, &mc_a ); #define MC_TEST_LUMA( w, h ) \ - if( mc_a.mc_luma ) \ + if( mc_a.mc_luma != mc_ref.mc_luma ) \ { \ + used_asm = 1; \ memset(buf3, 0xCD, 1024); \ memset(buf4, 0xCD, 1024); \ mc_c.mc_luma( src2, 32, dst1, 16, dx, dy, w, h ); \ @@ -270,13 +222,14 @@ static int check_mc() if( memcmp( buf3, buf4, 1024 ) ) \ { \ fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \ - ok[0] = 0; \ + ok = 0; \ } \ } #define MC_TEST_CHROMA( w, h ) \ - if( mc_a.mc_chroma ) \ + if( mc_a.mc_chroma != mc_ref.mc_chroma ) \ { \ + used_asm = 1; \ memset(buf3, 0xCD, 1024); \ memset(buf4, 0xCD, 1024); \ mc_c.mc_chroma( src, 32, dst1, 16, dx, dy, w, h ); \ @@ -284,12 +237,11 @@ static int check_mc() if( memcmp( buf3, buf4, 1024 ) ) \ { \ fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \ - ok[1] = 0; \ + ok = 0; \ } \ } - + ok = 1; used_asm = 0; for( dy = 0; dy < 4; dy++ ) - { for( dx = 0; dx < 4; dx++ ) { MC_TEST_LUMA( 16, 16 ); @@ -299,7 +251,13 @@ static int check_mc() MC_TEST_LUMA( 8, 4 ); MC_TEST_LUMA( 4, 8 ); MC_TEST_LUMA( 4, 4 ); + } + report( "mc luma :" ); + ok = 1; used_asm = 0; + for( dy = 0; dy < 9; dy++ ) + for( dx = 0; dx < 9; dx++ ) + { MC_TEST_CHROMA( 8, 8 ); MC_TEST_CHROMA( 8, 4 ); MC_TEST_CHROMA( 4, 8 ); @@ -308,52 +266,146 @@ static int check_mc() MC_TEST_CHROMA( 2, 4 ); MC_TEST_CHROMA( 2, 2 ); } - } + report( "mc chroma :" ); #undef MC_TEST_LUMA #undef MC_TEST_CHROMA - if( ok[0] ) - fprintf( stderr, " - mc luma : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - mc luma : [FAILED]\n" ); + +#define MC_TEST_AVG( name, ... ) \ + for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \ + { \ + memcpy( buf3, buf1, 1024 ); \ + memcpy( buf4, buf1, 1024 ); \ + if( mc_a.name[i] != mc_ref.name[i] ) \ + { \ + used_asm = 1; \ + mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \ + mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \ + if( memcmp( buf3, buf4, 1024 ) ) \ + { \ + ok = 0; \ + fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \ + } \ + } \ } - if( ok[1] ) - fprintf( stderr, " - mc chroma : [OK]\n" ); - else { - ret = -1; - fprintf( stderr, " - mc chroma : [FAILED]\n" ); + MC_TEST_AVG( avg ); + report( "mc avg :" ); + for( w = -64; w <= 128 && ok; w++ ) + MC_TEST_AVG( avg_weight, w ); + report( "mc wpredb :" ); + + return ret; +} + +static int check_quant( int cpu_ref, int cpu_new ) +{ + x264_quant_function_t qf_c; + x264_quant_function_t qf_ref; + x264_quant_function_t qf_a; + int16_t dct1[64], dct2[64]; + uint8_t cqm_buf[64]; + int ret = 0, ok = 1, used_asm = 0; + int i, i_cqm; + x264_t h_buf; + x264_t *h = &h_buf; + h->pps = h->pps_array; + x264_param_default( &h->param ); + + for( i_cqm = 0; i_cqm < 4; i_cqm++ ) + { + if( i_cqm == 0 ) + for( i = 0; i < 6; i++ ) + h->pps->scaling_list[i] = x264_cqm_flat16; + else if( i_cqm == 1 ) + for( i = 0; i < 6; i++ ) + h->pps->scaling_list[i] = x264_cqm_jvt[i]; + else + { + if( i_cqm == 2 ) + for( i = 0; i < 64; i++ ) + cqm_buf[i] = 10 + rand() % 246; + else + for( i = 0; i < 64; i++ ) + cqm_buf[i] = 1; + for( i = 0; i < 6; i++ ) + h->pps->scaling_list[i] = cqm_buf; + } + + x264_cqm_init( h ); + x264_quant_init( h, 0, &qf_c ); + x264_quant_init( h, cpu_ref, &qf_ref ); + x264_quant_init( h, cpu_new, &qf_a ); + +#define TEST_QUANT( name, cqm ) \ + if( qf_a.name != qf_ref.name ) \ + { \ + used_asm = 1; \ + for( i = 0; i < 64; i++ ) \ + dct1[i] = dct2[i] = rand() & 0xfff; \ + qf_c.name( (void*)dct1, cqm, 20, (1<<20)/6 ); \ + qf_a.name( (void*)dct2, cqm, 20, (1<<20)/6 ); \ + if( memcmp( dct1, dct2, 64*2 ) ) \ + { \ + ok = 0; \ + fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \ + } \ + } + + TEST_QUANT( quant_8x8_core, *h->quant8_mf[CQM_8IY] ); + TEST_QUANT( quant_8x8_core, *h->quant8_mf[CQM_8PY] ); + TEST_QUANT( quant_4x4_core, *h->quant4_mf[CQM_4IY] ); + TEST_QUANT( quant_4x4_core, *h->quant4_mf[CQM_4PY] ); + TEST_QUANT( quant_4x4_dc_core, ***h->quant4_mf[CQM_4IY] ); + TEST_QUANT( quant_2x2_dc_core, ***h->quant4_mf[CQM_4IC] ); } + + report( "quant :" ); + return ret; } +int check_all( int cpu_ref, int cpu_new ) +{ + return check_pixel( cpu_ref, cpu_new ) + + check_dct( cpu_ref, cpu_new ) + + check_mc( cpu_ref, cpu_new ) + + check_quant( cpu_ref, cpu_new ); +} + int main() { - int ret; + int ret = 0; int i; -#ifdef HAVE_MMXEXT - fprintf( stderr, "x264: MMXEXT against C\n" ); -#elif ARCH_PPC - fprintf( stderr, "x264: ALTIVEC against C\n" ); -#endif - buf1 = x264_malloc( 1024 ); /* 32 x 32 */ buf2 = x264_malloc( 1024 ); buf3 = x264_malloc( 1024 ); buf4 = x264_malloc( 1024 ); + buf5 = x264_malloc( 1024 ); srand( x264_mdate() ); for( i = 0; i < 1024; i++ ) { - buf1[i] = rand() % 0xFF; - buf2[i] = rand() % 0xFF; + buf1[i] = rand() & 0xFF; + buf2[i] = rand() & 0xFF; buf3[i] = buf4[i] = 0; } - ret = check_pixel() + - check_dct() + - check_mc(); +#ifdef HAVE_MMXEXT + fprintf( stderr, "x264: MMXEXT against C\n" ); + ret = check_all( 0, X264_CPU_MMX | X264_CPU_MMXEXT ); +#ifdef HAVE_SSE2 + if( x264_cpu_detect() & X264_CPU_SSE2 ) + { + fprintf( stderr, "\nx264: SSE2 against C\n" ); + ret |= check_all( X264_CPU_MMX | X264_CPU_MMXEXT, + X264_CPU_MMX | X264_CPU_MMXEXT | X264_CPU_SSE | X264_CPU_SSE2 ); + } +#endif +#elif ARCH_PPC + fprintf( stderr, "x264: ALTIVEC against C\n" ); + ret = check_all( 0, X264_CPU_ALTIVEC ); +#endif if( ret == 0 ) {