From: George Stephanos Date: Sun, 2 Jan 2011 16:26:10 +0000 (-0500) Subject: SSE2 high bit depth intra_predict_(8x8c|16x16)_p X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=770718bc498bbc215c3f0876013de2b2b3c1db32;p=libx264 SSE2 high bit depth intra_predict_(8x8c|16x16)_p Patch from Google Code-In. --- diff --git a/common/predict.c b/common/predict.c index 20a57e59..4e0a532a 100644 --- a/common/predict.c +++ b/common/predict.c @@ -119,7 +119,7 @@ void x264_predict_16x16_v_c( pixel *src ) src += FDEC_STRIDE; } } -static void x264_predict_16x16_p_c( pixel *src ) +void x264_predict_16x16_p_c( pixel *src ) { int H = 0, V = 0; @@ -269,7 +269,7 @@ void x264_predict_8x8c_v_c( pixel *src ) src += FDEC_STRIDE; } } -static void x264_predict_8x8c_p_c( pixel *src ) +void x264_predict_8x8c_p_c( pixel *src ) { int H = 0, V = 0; diff --git a/common/predict.h b/common/predict.h index b6489cd0..c0543312 100644 --- a/common/predict.h +++ b/common/predict.h @@ -118,9 +118,11 @@ void x264_predict_4x4_v_c ( pixel *src ); void x264_predict_16x16_dc_c( pixel *src ); void x264_predict_16x16_h_c ( pixel *src ); void x264_predict_16x16_v_c ( pixel *src ); +void x264_predict_16x16_p_c ( pixel *src ); void x264_predict_8x8c_dc_c ( pixel *src ); void x264_predict_8x8c_h_c ( pixel *src ); void x264_predict_8x8c_v_c ( pixel *src ); +void x264_predict_8x8c_p_c ( pixel *src ); void x264_predict_16x16_init ( int cpu, x264_predict_t pf[7] ); void x264_predict_8x8c_init ( int cpu, x264_predict_t pf[7] ); diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm index 1f5fd7dd..a5eaaf73 100644 --- a/common/x86/predict-a.asm +++ b/common/x86/predict-a.asm @@ -47,6 +47,7 @@ cextern pw_4 cextern pw_8 cextern pw_ff00 cextern pb_reverse +cextern pw_pixel_max %macro STORE8x8 2 add r0, 4*FDEC_STRIDEB @@ -1010,11 +1011,11 @@ INIT_MMX PREDICT_8x8_VR mmxext, b, q , 8 %endif -%ifndef ARCH_X86_64 -INIT_MMX ;----------------------------------------------------------------------------- ; void predict_8x8c_p_core( uint8_t *src, int i00, int b, int c ) ;----------------------------------------------------------------------------- +%ifndef ARCH_X86_64 +INIT_MMX cglobal predict_8x8c_p_core_mmxext, 1,2 LOAD_PLANE_ARGS movq mm1, mm2 @@ -1039,10 +1040,66 @@ ALIGN 4 dec r1d jg .loop REP_RET +%endif ; !ARCH_X86_64 + +INIT_XMM +cglobal predict_8x8c_p_core_sse2, 1,1 + movd m0, r1m + movd m2, r2m + movd m4, r3m +%ifdef HIGH_BIT_DEPTH + mova m3, [pw_pixel_max] + pxor m1, m1 +%endif + SPLATW m0, m0, 0 + SPLATW m2, m2, 0 + SPLATW m4, m4, 0 + pmullw m2, [pw_76543210] +%ifdef HIGH_BIT_DEPTH + mov r1d, 8 +.loop: + mova m5, m0 + paddsw m5, m2 + psraw m5, 5 + CLIPW m5, m1, m3 + mova [r0], m5 + paddw m2, m4 + add r0, FDEC_STRIDEB + dec r1d + jg .loop +%else ;!HIGH_BIT_DEPTH + paddsw m0, m2 ; m0 = {i+0*b, i+1*b, i+2*b, i+3*b, i+4*b, i+5*b, i+6*b, i+7*b} + mova m3, m0 + paddsw m3, m4 + paddsw m4, m4 +call .loop + add r0, FDEC_STRIDE*4 +.loop: + mova m5, m0 + mova m1, m3 + psraw m0, 5 + psraw m3, 5 + packuswb m0, m3 + movq [r0+FDEC_STRIDE*0], m0 + movhps [r0+FDEC_STRIDE*1], m0 + paddsw m5, m4 + paddsw m1, m4 + mova m0, m5 + mova m3, m1 + psraw m5, 5 + psraw m1, 5 + packuswb m5, m1 + movq [r0+FDEC_STRIDE*2], m5 + movhps [r0+FDEC_STRIDE*3], m5 + paddsw m0, m4 + paddsw m3, m4 +%endif ;!HIGH_BIT_DEPTH + RET ;----------------------------------------------------------------------------- ; void predict_16x16_p_core( uint8_t *src, int i00, int b, int c ) ;----------------------------------------------------------------------------- +%ifndef ARCH_X86_64 cglobal predict_16x16_p_core_mmxext, 1,2 LOAD_PLANE_ARGS movq mm5, mm2 @@ -1081,9 +1138,73 @@ ALIGN 4 dec r1d jg .loop REP_RET - %endif ; !ARCH_X86_64 +INIT_XMM +cglobal predict_16x16_p_core_sse2, 1,2,8 + movd m0, r1m + movd m1, r2m + movd m2, r3m +%ifdef HIGH_BIT_DEPTH + pxor m6, m6 + pxor m7, m7 +%endif + SPLATW m0, m0, 0 + SPLATW m1, m1, 0 + SPLATW m2, m2, 0 + mova m3, m1 + pmullw m3, [pw_76543210] + psllw m1, 3 +%ifdef HIGH_BIT_DEPTH + mov r1d, 16 +.loop: + mova m4, m0 + mova m5, m0 + mova m7, m3 + paddsw m7, m6 + paddsw m4, m7 + paddsw m7, m1 + paddsw m5, m7 + psraw m4, 5 + psraw m5, 5 + CLIPW m4, [pb_0], [pw_pixel_max] + CLIPW m5, [pb_0], [pw_pixel_max] + mova [r0], m4 + mova [r0+16], m5 + add r0, FDEC_STRIDEB + paddw m6, m2 + dec r1d + jg .loop +%else ;!HIGH_BIT_DEPTH + paddsw m0, m3 ; m0 = {i+ 0*b, i+ 1*b, i+ 2*b, i+ 3*b, i+ 4*b, i+ 5*b, i+ 6*b, i+ 7*b} + paddsw m1, m0 ; m1 = {i+ 8*b, i+ 9*b, i+10*b, i+11*b, i+12*b, i+13*b, i+14*b, i+15*b} + mova m7, m2 + paddsw m7, m7 + mov r1d, 8 +ALIGN 4 +.loop: + mova m3, m0 + mova m4, m1 + mova m5, m0 + mova m6, m1 + psraw m3, 5 + psraw m4, 5 + paddsw m5, m2 + paddsw m6, m2 + psraw m5, 5 + psraw m6, 5 + packuswb m3, m4 + packuswb m5, m6 + mova [r0+FDEC_STRIDE*0], m3 + mova [r0+FDEC_STRIDE*1], m5 + paddsw m0, m7 + paddsw m1, m7 + add r0, FDEC_STRIDE*2 + dec r1d + jg .loop +%endif ;!HIGH_BIT_DEPTH + REP_RET + INIT_XMM ;----------------------------------------------------------------------------- ; void predict_8x8_ddl( uint8_t *src, uint8_t *edge ) @@ -1574,91 +1695,6 @@ cglobal predict_8x8c_dc_top_mmxext, 1,1 RET %endif -;----------------------------------------------------------------------------- -; void predict_8x8c_p_core( uint8_t *src, int i00, int b, int c ) -;----------------------------------------------------------------------------- - -cglobal predict_8x8c_p_core_sse2, 1,1 - movd xmm0, r1m - movd xmm2, r2m - movd xmm4, r3m - pshuflw xmm0, xmm0, 0 - pshuflw xmm2, xmm2, 0 - pshuflw xmm4, xmm4, 0 - punpcklqdq xmm0, xmm0 - punpcklqdq xmm2, xmm2 - punpcklqdq xmm4, xmm4 - pmullw xmm2, [pw_76543210] - paddsw xmm0, xmm2 ; xmm0 = {i+0*b, i+1*b, i+2*b, i+3*b, i+4*b, i+5*b, i+6*b, i+7*b} - movdqa xmm3, xmm0 - paddsw xmm3, xmm4 - paddsw xmm4, xmm4 -call .loop - add r0, FDEC_STRIDE*4 -.loop: - movdqa xmm5, xmm0 - movdqa xmm1, xmm3 - psraw xmm0, 5 - psraw xmm3, 5 - packuswb xmm0, xmm3 - movq [r0+FDEC_STRIDE*0], xmm0 - movhps [r0+FDEC_STRIDE*1], xmm0 - paddsw xmm5, xmm4 - paddsw xmm1, xmm4 - movdqa xmm0, xmm5 - movdqa xmm3, xmm1 - psraw xmm5, 5 - psraw xmm1, 5 - packuswb xmm5, xmm1 - movq [r0+FDEC_STRIDE*2], xmm5 - movhps [r0+FDEC_STRIDE*3], xmm5 - paddsw xmm0, xmm4 - paddsw xmm3, xmm4 - RET - -;----------------------------------------------------------------------------- -; void predict_16x16_p_core( uint8_t *src, int i00, int b, int c ) -;----------------------------------------------------------------------------- -cglobal predict_16x16_p_core_sse2, 1,2,8 - movd xmm0, r1m - movd xmm1, r2m - movd xmm2, r3m - pshuflw xmm0, xmm0, 0 - pshuflw xmm1, xmm1, 0 - pshuflw xmm2, xmm2, 0 - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - punpcklqdq xmm2, xmm2 - movdqa xmm3, xmm1 - pmullw xmm3, [pw_76543210] - psllw xmm1, 3 - paddsw xmm0, xmm3 ; xmm0 = {i+ 0*b, i+ 1*b, i+ 2*b, i+ 3*b, i+ 4*b, i+ 5*b, i+ 6*b, i+ 7*b} - paddsw xmm1, xmm0 ; xmm1 = {i+ 8*b, i+ 9*b, i+10*b, i+11*b, i+12*b, i+13*b, i+14*b, i+15*b} - movdqa xmm7, xmm2 - paddsw xmm7, xmm7 - mov r1d, 8 -ALIGN 4 -.loop: - movdqa xmm3, xmm0 - movdqa xmm4, xmm1 - movdqa xmm5, xmm0 - movdqa xmm6, xmm1 - psraw xmm3, 5 - psraw xmm4, 5 - paddsw xmm5, xmm2 - paddsw xmm6, xmm2 - psraw xmm5, 5 - psraw xmm6, 5 - packuswb xmm3, xmm4 - packuswb xmm5, xmm6 - movdqa [r0+FDEC_STRIDE*0], xmm3 - movdqa [r0+FDEC_STRIDE*1], xmm5 - paddsw xmm0, xmm7 - paddsw xmm1, xmm7 - add r0, FDEC_STRIDE*2 - dec r1d - jg .loop - REP_RET ;----------------------------------------------------------------------------- ; void predict_16x16_v( pixel *src ) diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c index 72b89a53..5de5598b 100644 --- a/common/x86/predict-c.c +++ b/common/x86/predict-c.c @@ -42,9 +42,9 @@ void x264_predict_16x16_dc_top_sse2( pixel *src ); void x264_predict_16x16_dc_top_ssse3( uint16_t *src ); void x264_predict_16x16_p_core_mmxext( uint8_t *src, int i00, int b, int c ); - void x264_predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c ); + void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c ); void x264_predict_8x8c_p_core_mmxext( uint8_t *src, int i00, int b, int c ); - void x264_predict_8x8c_p_core_sse2( uint8_t *src, int i00, int b, int c ); + void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c ); void x264_predict_8x8c_dc_mmxext( pixel *src ); void x264_predict_8x8c_dc_sse2( uint16_t *src ); void x264_predict_8x8c_dc_top_mmxext( uint8_t *src ); @@ -127,17 +127,20 @@ static void x264_predict_16x16_dc_left_##name( pixel *src )\ PREDICT_16x16_DC_LEFT( mmxext ) PREDICT_16x16_DC_LEFT( sse2 ) -#if !HIGH_BIT_DEPTH -ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8}; -ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1}; -ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4}; - #define PREDICT_P_SUM(j,i)\ H += i * ( src[j+i - FDEC_STRIDE ] - src[j-i - FDEC_STRIDE ] );\ V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );\ +ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8}; +ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1}; +ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4}; +ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8}; +ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1}; +ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4}; + +#if !HIGH_BIT_DEPTH #define PREDICT_16x16_P(name)\ -static void x264_predict_16x16_p_##name( uint8_t *src )\ +static void x264_predict_16x16_p_##name( pixel *src )\ {\ int a, b, c;\ int H = 0;\ @@ -157,17 +160,37 @@ static void x264_predict_16x16_p_##name( uint8_t *src )\ i00 = a - b * 7 - c * 7 + 16;\ x264_predict_16x16_p_core_##name( src, i00, b, c );\ } - #ifndef ARCH_X86_64 PREDICT_16x16_P( mmxext ) #endif PREDICT_16x16_P( sse2 ) +#endif //!HIGH_BIT_DEPTH #ifdef __GNUC__ +#if HIGH_BIT_DEPTH +static void x264_predict_16x16_p_sse2( uint16_t *src ) +#else static void x264_predict_16x16_p_ssse3( uint8_t *src ) +#endif { int a, b, c, i00; int H, V; +#if HIGH_BIT_DEPTH + asm ( + "movdqu -2+%1, %%xmm1 \n" + "movdqa 16+%1, %%xmm0 \n" + "pmaddwd %2, %%xmm0 \n" + "pmaddwd %3, %%xmm1 \n" + "paddd %%xmm1, %%xmm0 \n" + "movhlps %%xmm0, %%xmm1 \n" + "paddd %%xmm1, %%xmm0 \n" + "pshuflw $14, %%xmm0, %%xmm1 \n" + "paddd %%xmm1, %%xmm0 \n" + "movd %%xmm0, %0 \n" + :"=r"(H) + :"m"(src[-FDEC_STRIDE]), "m"(*pw_12345678), "m"(*pw_m87654321) + ); +#else asm ( "movq %1, %%mm1 \n" "movq 8+%1, %%mm0 \n" @@ -184,6 +207,7 @@ static void x264_predict_16x16_p_ssse3( uint8_t *src ) :"=r"(H) :"m"(src[-FDEC_STRIDE]), "m"(*pb_12345678), "m"(*pb_m87654321) ); +#endif V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] ) + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] ) + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] ) @@ -196,10 +220,17 @@ static void x264_predict_16x16_p_ssse3( uint8_t *src ) b = ( 5 * H + 32 ) >> 6; c = ( 5 * V + 32 ) >> 6; i00 = a - b * 7 - c * 7 + 16; - x264_predict_16x16_p_core_sse2( src, i00, b, c ); + /* b*15 + c*15 can overflow: it's easier to just branch away in this rare case + * than to try to consider it in the asm. */ + if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 1092 || abs(c) > 1092) ) + x264_predict_16x16_p_c( src ); + else + x264_predict_16x16_p_core_sse2( src, i00, b, c ); } #endif +#if !HIGH_BIT_DEPTH + #define PREDICT_8x8_P(name)\ static void x264_predict_8x8c_p_##name( uint8_t *src )\ {\ @@ -217,17 +248,35 @@ static void x264_predict_8x8c_p_##name( uint8_t *src )\ i00 = a -3*b -3*c + 16;\ x264_predict_8x8c_p_core_##name( src, i00, b, c );\ } - #ifndef ARCH_X86_64 PREDICT_8x8_P( mmxext ) #endif PREDICT_8x8_P( sse2 ) +#endif //!HIGH_BIT_DEPTH + #ifdef __GNUC__ +#if HIGH_BIT_DEPTH +static void x264_predict_8x8c_p_sse2( uint16_t *src ) +#else static void x264_predict_8x8c_p_ssse3( uint8_t *src ) +#endif { int a, b, c, i00; int H, V; +#if HIGH_BIT_DEPTH + asm ( + "movdqa %1, %%xmm0 \n" + "pmaddwd %2, %%xmm0 \n" + "movhlps %%xmm0, %%xmm1 \n" + "paddd %%xmm1, %%xmm0 \n" + "pshuflw $14, %%xmm0, %%xmm1 \n" + "paddd %%xmm1, %%xmm0 \n" + "movd %%xmm0, %0 \n" + :"=r"(H) + :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234) + ); +#else asm ( "movq %1, %%mm0 \n" "pmaddubsw %2, %%mm0 \n" @@ -240,6 +289,7 @@ static void x264_predict_8x8c_p_ssse3( uint8_t *src ) :"=r"(H) :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234) ); +#endif V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] ) + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] ) + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] ) @@ -249,10 +299,15 @@ static void x264_predict_8x8c_p_ssse3( uint8_t *src ) b = ( 17 * H + 16 ) >> 5; c = ( 17 * V + 16 ) >> 5; i00 = a -3*b -3*c + 16; - x264_predict_8x8c_p_core_sse2( src, i00, b, c ); + /* b*7 + c*7 can overflow: it's easier to just branch away in this rare case + * than to try to consider it in the asm. */ + if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 2340 || abs(c) > 2340) ) + x264_predict_8x8c_p_c( src ); + else + x264_predict_8x8c_p_core_sse2( src, i00, b, c ); } #endif - +#if !HIGH_BIT_DEPTH #if ARCH_X86_64 static void x264_predict_8x8c_dc_left( uint8_t *src ) { @@ -360,6 +415,7 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] ) pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2; pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse2; pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2; + pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2; #else #if !ARCH_X86_64 pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmxext; @@ -397,6 +453,7 @@ void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] ) pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2; pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2; pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2; + pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2; #else #if ARCH_X86_64 pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left; diff --git a/tools/checkasm.c b/tools/checkasm.c index a0e74c75..c552ab9b 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -1694,6 +1694,7 @@ static int check_intra( int cpu_ref, int cpu_new ) int ret = 0, ok = 1, used_asm = 0; ALIGNED_16( pixel edge[33] ); ALIGNED_16( pixel edge2[33] ); + ALIGNED_16( pixel fdec[FDEC_STRIDE*20] ); struct { x264_predict_t predict_16x16[4+3]; @@ -1718,18 +1719,20 @@ static int check_intra( int cpu_ref, int cpu_new ) x264_predict_8x8_init( cpu_new, ip_a.predict_8x8, &ip_a.predict_8x8_filter ); x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 ); - ip_c.predict_8x8_filter( pbuf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS ); + memcpy( fdec, pbuf1, 32*20 * sizeof(pixel) );\ -#define INTRA_TEST( name, dir, w, ... )\ + ip_c.predict_8x8_filter( fdec+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS ); + +#define INTRA_TEST( name, dir, w, bench, ... )\ if( ip_a.name[dir] != ip_ref.name[dir] )\ {\ set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\ used_asm = 1;\ - memcpy( pbuf3, pbuf1, 32*20 * sizeof(pixel) );\ - memcpy( pbuf4, pbuf1, 32*20 * sizeof(pixel) );\ - call_c( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\ - call_a( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\ - if( memcmp( pbuf3, pbuf4, 32*20 * sizeof(pixel) ) )\ + memcpy( pbuf3, fdec, FDEC_STRIDE*20 * sizeof(pixel) );\ + memcpy( pbuf4, fdec, FDEC_STRIDE*20 * sizeof(pixel) );\ + call_c##bench( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\ + call_a##bench( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\ + if( memcmp( pbuf3, pbuf4, FDEC_STRIDE*20 * sizeof(pixel) ) )\ {\ fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\ ok = 0;\ @@ -1740,7 +1743,7 @@ static int check_intra( int cpu_ref, int cpu_new ) {\ printf( "%2x ", edge[14-j] );\ for( int k = 0; k < w; k++ )\ - printf( "%2x ", pbuf4[48+k+j*32] );\ + printf( "%2x ", pbuf4[48+k+j*FDEC_STRIDE] );\ printf( "\n" );\ }\ printf( "\n" );\ @@ -1748,20 +1751,20 @@ static int check_intra( int cpu_ref, int cpu_new ) {\ printf( " " );\ for( int k = 0; k < w; k++ )\ - printf( "%2x ", pbuf3[48+k+j*32] );\ + printf( "%2x ", pbuf3[48+k+j*FDEC_STRIDE] );\ printf( "\n" );\ }\ }\ } for( int i = 0; i < 12; i++ ) - INTRA_TEST( predict_4x4, i, 4 ); + INTRA_TEST( predict_4x4, i, 4, ); for( int i = 0; i < 7; i++ ) - INTRA_TEST( predict_8x8c, i, 8 ); + INTRA_TEST( predict_8x8c, i, 8, ); for( int i = 0; i < 7; i++ ) - INTRA_TEST( predict_16x16, i, 16 ); + INTRA_TEST( predict_16x16, i, 16, ); for( int i = 0; i < 12; i++ ) - INTRA_TEST( predict_8x8, i, 8, edge ); + INTRA_TEST( predict_8x8, i, 8, , edge ); set_func_name("intra_predict_8x8_filter"); if( ip_a.predict_8x8_filter != ip_ref.predict_8x8_filter ) @@ -1780,6 +1783,32 @@ static int check_intra( int cpu_ref, int cpu_new ) } } +#define EXTREMAL_PLANE(size) \ + { \ + int max[7]; \ + for( int j = 0; j < 7; j++ ) \ + max[j] = test ? rand()&PIXEL_MAX : PIXEL_MAX; \ + fdec[48-1-FDEC_STRIDE] = (i&1)*max[0]; \ + for( int j = 0; j < size/2; j++ ) \ + fdec[48+j-FDEC_STRIDE] = (!!(i&2))*max[1]; \ + for( int j = size/2; j < size-1; j++ ) \ + fdec[48+j-FDEC_STRIDE] = (!!(i&4))*max[2]; \ + fdec[48+(size-1)-FDEC_STRIDE] = (!!(i&8))*max[3]; \ + for( int j = 0; j < size/2; j++ ) \ + fdec[48+j*FDEC_STRIDE-1] = (!!(i&16))*max[4]; \ + for( int j = size/2; j < size-1; j++ ) \ + fdec[48+j*FDEC_STRIDE-1] = (!!(i&32))*max[5]; \ + fdec[48+(size-1)*FDEC_STRIDE-1] = (!!(i&64))*max[6]; \ + } + /* Extremal test case for planar prediction. */ + for( int test = 0; test < 100 && ok; test++ ) + for( int i = 0; i < 128 && ok; i++ ) + { + EXTREMAL_PLANE( 8 ); + INTRA_TEST( predict_8x8c, I_PRED_CHROMA_P, 8, 1 ); + EXTREMAL_PLANE( 16 ); + INTRA_TEST( predict_16x16, I_PRED_16x16_P, 16, 1 ); + } report( "intra pred :" ); return ret; }