From: Loren Merritt Date: Sun, 13 Mar 2005 07:04:16 +0000 (+0000) Subject: SSD comparison function (not yet used). X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=73522c84014f240abe7ee70c6e98657b08f97b44;p=libx264 SSD comparison function (not yet used). Cosmetics in mmx SAD. git-svn-id: svn://svn.videolan.org/x264/trunk@165 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/i386/pixel-a.asm b/common/i386/pixel-a.asm index 14015741..9760173f 100644 --- a/common/i386/pixel-a.asm +++ b/common/i386/pixel-a.asm @@ -91,6 +91,102 @@ BITS 32 lea ecx, [ecx+2*edx] %endmacro +%macro SSD_INC_1x16P 0 + movq mm1, [eax] + movq mm2, [ecx] + movq mm3, [eax+8] + movq mm4, [ecx+8] + + movq mm5, mm2 + movq mm6, mm4 + psubusb mm2, mm1 + psubusb mm4, mm3 + psubusb mm1, mm5 + psubusb mm3, mm6 + por mm1, mm2 + por mm3, mm4 + + movq mm2, mm1 + movq mm4, mm3 + punpcklbw mm1, mm7 + punpcklbw mm3, mm7 + punpckhbw mm2, mm7 + punpckhbw mm4, mm7 + pmaddwd mm1, mm1 + pmaddwd mm2, mm2 + pmaddwd mm3, mm3 + pmaddwd mm4, mm4 + + add eax, ebx + add ecx, edx + paddd mm0, mm1 + paddd mm0, mm2 + paddd mm0, mm3 + paddd mm0, mm4 +%endmacro + +%macro SSD_INC_1x8P 0 + movq mm1, [eax] + movq mm2, [ecx] + + movq mm5, mm2 + psubusb mm2, mm1 + psubusb mm1, mm5 + por mm1, mm2 ; mm1 = 8bit abs diff + + movq mm2, mm1 + punpcklbw mm1, mm7 + punpckhbw mm2, mm7 ; (mm1,mm2) = 16bit abs diff + pmaddwd mm1, mm1 + pmaddwd mm2, mm2 + + add eax, ebx + add ecx, edx + paddd mm0, mm1 + paddd mm0, mm2 +%endmacro + +%macro SSD_INC_1x4P 0 + movd mm1, [eax] + movd mm2, [ecx] + + movq mm5, mm2 + psubusb mm2, mm1 + psubusb mm1, mm5 + por mm1, mm2 + punpcklbw mm1, mm7 + pmaddwd mm1, mm1 + + add eax, ebx + add ecx, edx + paddd mm0, mm1 +%endmacro + +%macro SSD_INC_8x16P 0 + SSD_INC_1x16P + SSD_INC_1x16P + SSD_INC_1x16P + SSD_INC_1x16P + SSD_INC_1x16P + SSD_INC_1x16P + SSD_INC_1x16P + SSD_INC_1x16P +%endmacro + +%macro SSD_INC_4x8P 0 + SSD_INC_1x8P + SSD_INC_1x8P + SSD_INC_1x8P + SSD_INC_1x8P +%endmacro + +%macro SSD_INC_4x4P 0 + SSD_INC_1x4P + SSD_INC_1x4P + SSD_INC_1x4P + SSD_INC_1x4P +%endmacro + %macro LOAD_DIFF_4P 5 ; MMP, MMT, MMZ, [pix1], [pix2] movd %1, %4 punpcklbw %1, %3 @@ -203,6 +299,14 @@ cglobal x264_pixel_sad_8x4_mmxext cglobal x264_pixel_sad_4x8_mmxext cglobal x264_pixel_sad_4x4_mmxext +cglobal x264_pixel_ssd_16x16_mmxext +cglobal x264_pixel_ssd_16x8_mmxext +cglobal x264_pixel_ssd_8x16_mmxext +cglobal x264_pixel_ssd_8x8_mmxext +cglobal x264_pixel_ssd_8x4_mmxext +cglobal x264_pixel_ssd_4x8_mmxext +cglobal x264_pixel_ssd_4x4_mmxext + cglobal x264_pixel_satd_4x4_mmxext cglobal x264_pixel_satd_4x8_mmxext cglobal x264_pixel_satd_8x4_mmxext @@ -211,11 +315,7 @@ cglobal x264_pixel_satd_16x8_mmxext cglobal x264_pixel_satd_8x16_mmxext cglobal x264_pixel_satd_16x16_mmxext -ALIGN 16 -;----------------------------------------------------------------------------- -; int __cdecl x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int ) -;----------------------------------------------------------------------------- -x264_pixel_sad_16x16_mmxext: +%macro SAD_START 0 push ebx mov eax, [esp+ 8] ; pix1 @@ -224,153 +324,105 @@ x264_pixel_sad_16x16_mmxext: mov edx, [esp+20] ; stride2 pxor mm0, mm0 +%endmacro +%macro SAD_END 0 + movd eax, mm0 + pop ebx + ret +%endmacro + +ALIGN 16 +;----------------------------------------------------------------------------- +; int __cdecl x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int ) +;----------------------------------------------------------------------------- +x264_pixel_sad_16x16_mmxext: + SAD_START SAD_INC_2x16P SAD_INC_2x16P SAD_INC_2x16P SAD_INC_2x16P - SAD_INC_2x16P SAD_INC_2x16P SAD_INC_2x16P SAD_INC_2x16P - - movd eax, mm0 - - pop ebx - ret + SAD_END ALIGN 16 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_sad_16x8_mmxext (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- x264_pixel_sad_16x8_mmxext: - push ebx - - mov eax, [esp+ 8] ; pix1 - mov ebx, [esp+12] ; stride1 - mov ecx, [esp+16] ; pix2 - mov edx, [esp+20] ; stride2 - - pxor mm0, mm0 - + SAD_START SAD_INC_2x16P SAD_INC_2x16P SAD_INC_2x16P SAD_INC_2x16P - - movd eax, mm0 - - pop ebx - ret - + SAD_END ALIGN 16 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_sad_8x16_mmxext (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- x264_pixel_sad_8x16_mmxext: - push ebx - - mov eax, [esp+ 8] ; pix1 - mov ebx, [esp+12] ; stride1 - mov ecx, [esp+16] ; pix2 - mov edx, [esp+20] ; stride2 - - pxor mm0, mm0 - + SAD_START SAD_INC_2x8P SAD_INC_2x8P SAD_INC_2x8P SAD_INC_2x8P - SAD_INC_2x8P SAD_INC_2x8P SAD_INC_2x8P SAD_INC_2x8P - - movd eax, mm0 - - pop ebx - ret + SAD_END ALIGN 16 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_sad_8x8_mmxext (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- x264_pixel_sad_8x8_mmxext: - push ebx - - mov eax, [esp+ 8] ; pix1 - mov ebx, [esp+12] ; stride1 - mov ecx, [esp+16] ; pix2 - mov edx, [esp+20] ; stride2 - - pxor mm0, mm0 - + SAD_START SAD_INC_2x8P SAD_INC_2x8P SAD_INC_2x8P SAD_INC_2x8P - - movd eax, mm0 - - pop ebx - ret + SAD_END ALIGN 16 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_sad_8x4_mmxext (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- x264_pixel_sad_8x4_mmxext: - push ebx - - mov eax, [esp+ 8] ; pix1 - mov ebx, [esp+12] ; stride1 - mov ecx, [esp+16] ; pix2 - mov edx, [esp+20] ; stride2 - - pxor mm0, mm0 - + SAD_START SAD_INC_2x8P SAD_INC_2x8P - - movd eax, mm0 - - pop ebx - ret - + SAD_END ALIGN 16 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_sad_4x8_mmxext (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- x264_pixel_sad_4x8_mmxext: - push ebx - - mov eax, [esp+ 8] ; pix1 - mov ebx, [esp+12] ; stride1 - mov ecx, [esp+16] ; pix2 - mov edx, [esp+20] ; stride2 - - pxor mm0, mm0 - + SAD_START SAD_INC_2x4P SAD_INC_2x4P - SAD_INC_2x4P SAD_INC_2x4P - - movd eax, mm0 - - pop ebx - ret + SAD_END ALIGN 16 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_sad_4x4_mmxext (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- x264_pixel_sad_4x4_mmxext: + SAD_START + SAD_INC_2x4P + SAD_INC_2x4P + SAD_END + + + +%macro SSD_START 0 push ebx mov eax, [esp+ 8] ; pix1 @@ -378,15 +430,72 @@ x264_pixel_sad_4x4_mmxext: mov ecx, [esp+16] ; pix2 mov edx, [esp+20] ; stride2 - pxor mm0, mm0 + pxor mm7, mm7 ; zero + pxor mm0, mm0 ; mm0 holds the sum +%endmacro - SAD_INC_2x4P - SAD_INC_2x4P - - movd eax, mm0 +%macro SSD_END 0 + movq mm1, mm0 + psrlq mm1, 32 + paddd mm0, mm1 + movd eax, mm0 pop ebx ret +%endmacro + +ALIGN 16 +;----------------------------------------------------------------------------- +; int __cdecl x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int ) +;----------------------------------------------------------------------------- +x264_pixel_ssd_16x16_mmxext: + SSD_START + SSD_INC_8x16P + SSD_INC_8x16P + SSD_END + +ALIGN 16 +x264_pixel_ssd_16x8_mmxext: + SSD_START + SSD_INC_8x16P + SSD_END + +ALIGN 16 +x264_pixel_ssd_8x16_mmxext: + SSD_START + SSD_INC_4x8P + SSD_INC_4x8P + SSD_INC_4x8P + SSD_INC_4x8P + SSD_END + +ALIGN 16 +x264_pixel_ssd_8x8_mmxext: + SSD_START + SSD_INC_4x8P + SSD_INC_4x8P + SSD_END + +ALIGN 16 +x264_pixel_ssd_8x4_mmxext: + SSD_START + SSD_INC_4x8P + SSD_END + +ALIGN 16 +x264_pixel_ssd_4x8_mmxext: + SSD_START + SSD_INC_4x4P + SSD_INC_4x4P + SSD_END + +ALIGN 16 +x264_pixel_ssd_4x4_mmxext: + SSD_START + SSD_INC_4x4P + SSD_END + + ALIGN 16 ;----------------------------------------------------------------------------- @@ -402,7 +511,6 @@ x264_pixel_satd_4x4_mmxext: pxor mm7, mm7 - LOAD_DIFF_4P mm0, mm6, mm7, [eax], [ecx] LOAD_DIFF_4P mm1, mm6, mm7, [eax+ebx], [ecx+edx] LOAD_DIFF_4P mm2, mm6, mm7, [eax+2*ebx], [ecx+2*edx] @@ -416,8 +524,6 @@ x264_pixel_satd_4x4_mmxext: pop ebx ret - - ALIGN 16 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_satd_4x8_mmxext (uint8_t *, int, uint8_t *, int ) diff --git a/common/i386/pixel.h b/common/i386/pixel.h index 799cbfde..43916c0a 100644 --- a/common/i386/pixel.h +++ b/common/i386/pixel.h @@ -32,6 +32,14 @@ int x264_pixel_sad_8x4_mmxext( uint8_t *, int, uint8_t *, int ); int x264_pixel_sad_4x8_mmxext( uint8_t *, int, uint8_t *, int ); int x264_pixel_sad_4x4_mmxext( uint8_t *, int, uint8_t *, int ); +int x264_pixel_ssd_16x16_mmxext( uint8_t *, int, uint8_t *, int ); +int x264_pixel_ssd_16x8_mmxext( uint8_t *, int, uint8_t *, int ); +int x264_pixel_ssd_8x16_mmxext( uint8_t *, int, uint8_t *, int ); +int x264_pixel_ssd_8x8_mmxext( uint8_t *, int, uint8_t *, int ); +int x264_pixel_ssd_8x4_mmxext( uint8_t *, int, uint8_t *, int ); +int x264_pixel_ssd_4x8_mmxext( uint8_t *, int, uint8_t *, int ); +int x264_pixel_ssd_4x4_mmxext( uint8_t *, int, uint8_t *, int ); + int x264_pixel_satd_16x16_mmxext( uint8_t *, int, uint8_t *, int ); int x264_pixel_satd_16x8_mmxext( uint8_t *, int, uint8_t *, int ); int x264_pixel_satd_8x16_mmxext( uint8_t *, int, uint8_t *, int ); diff --git a/common/pixel.c b/common/pixel.c index b1d7c8df..fa3fbac4 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -72,6 +72,38 @@ PIXEL_SAD_C( pixel_sad_8x4, 8, 4 ) PIXEL_SAD_C( pixel_sad_4x8, 4, 8 ) PIXEL_SAD_C( pixel_sad_4x4, 4, 4 ) + +/**************************************************************************** + * pixel_ssd_WxH + ****************************************************************************/ +#define PIXEL_SSD_C( name, lx, ly ) \ +static int name( uint8_t *pix1, int i_stride_pix1, \ + uint8_t *pix2, int i_stride_pix2 ) \ +{ \ + int i_sum = 0; \ + int x, y; \ + for( y = 0; y < ly; y++ ) \ + { \ + for( x = 0; x < lx; x++ ) \ + { \ + int d = pix1[x] - pix2[x]; \ + i_sum += d*d; \ + } \ + pix1 += i_stride_pix1; \ + pix2 += i_stride_pix2; \ + } \ + return i_sum; \ +} + +PIXEL_SSD_C( pixel_ssd_16x16, 16, 16 ) +PIXEL_SSD_C( pixel_ssd_16x8, 16, 8 ) +PIXEL_SSD_C( pixel_ssd_8x16, 8, 16 ) +PIXEL_SSD_C( pixel_ssd_8x8, 8, 8 ) +PIXEL_SSD_C( pixel_ssd_8x4, 8, 4 ) +PIXEL_SSD_C( pixel_ssd_4x8, 4, 8 ) +PIXEL_SSD_C( pixel_ssd_4x4, 4, 4 ) + + static void pixel_sub_4x4( int16_t diff[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { int y, x; @@ -243,6 +275,14 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) pixf->sad[PIXEL_4x8] = pixel_sad_4x8; pixf->sad[PIXEL_4x4] = pixel_sad_4x4; + pixf->ssd[PIXEL_16x16] = pixel_ssd_16x16; + pixf->ssd[PIXEL_16x8] = pixel_ssd_16x8; + pixf->ssd[PIXEL_8x16] = pixel_ssd_8x16; + pixf->ssd[PIXEL_8x8] = pixel_ssd_8x8; + pixf->ssd[PIXEL_8x4] = pixel_ssd_8x4; + pixf->ssd[PIXEL_4x8] = pixel_ssd_4x8; + pixf->ssd[PIXEL_4x4] = pixel_ssd_4x4; + pixf->satd[PIXEL_16x16]= pixel_satd_16x16; pixf->satd[PIXEL_16x8] = pixel_satd_16x8; pixf->satd[PIXEL_8x16] = pixel_satd_8x16; @@ -284,6 +324,14 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) pixf->sad[PIXEL_4x8 ] = x264_pixel_sad_4x8_mmxext; pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_mmxext; + pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_mmxext; + pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_mmxext; + pixf->ssd[PIXEL_8x16] = x264_pixel_ssd_8x16_mmxext; + pixf->ssd[PIXEL_8x8] = x264_pixel_ssd_8x8_mmxext; + pixf->ssd[PIXEL_8x4] = x264_pixel_ssd_8x4_mmxext; + pixf->ssd[PIXEL_4x8] = x264_pixel_ssd_4x8_mmxext; + pixf->ssd[PIXEL_4x4] = x264_pixel_ssd_4x4_mmxext; + pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_mmxext; pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_mmxext; pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_mmxext; diff --git a/common/pixel.h b/common/pixel.h index 63f60fcf..20166969 100644 --- a/common/pixel.h +++ b/common/pixel.h @@ -25,6 +25,7 @@ #define _PIXEL_H 1 typedef int (*x264_pixel_sad_t) ( uint8_t *, int, uint8_t *, int ); +typedef int (*x264_pixel_ssd_t) ( uint8_t *, int, uint8_t *, int ); typedef int (*x264_pixel_satd_t)( uint8_t *, int, uint8_t *, int ); typedef void (*x264_pixel_avg_t) ( uint8_t *, int, uint8_t *, int ); typedef void (*x264_pixel_avg_weight_t) ( uint8_t *, int, uint8_t *, int, int ); @@ -65,6 +66,7 @@ static const int x264_size2pixel[5][5] = { typedef struct { x264_pixel_sad_t sad[7]; + x264_pixel_ssd_t ssd[7]; x264_pixel_satd_t satd[7]; x264_pixel_avg_t avg[10]; x264_pixel_avg_weight_t avg_weight[10]; diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 3f757ed1..d4525491 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -45,6 +45,7 @@ #endif #ifdef _MSC_VER #define exp2f(x) pow( 2, (x) ) +#define sqrtf sqrt #endif #ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't. #define rename(src,dst) (unlink(dst), rename(src,dst)) diff --git a/x264.c b/x264.c index 9bd20a96..762250ed 100644 --- a/x264.c +++ b/x264.c @@ -770,6 +770,7 @@ static int Encode( x264_param_t *param, hnd_t hin, FILE *fout ) /* Do not force any parameters */ pic.i_type = X264_TYPE_AUTO; + pic.i_qpplus1 = 0; if( x264_encoder_encode( h, &nal, &i_nal, &pic, &pic ) < 0 ) { fprintf( stderr, "x264_encoder_encode failed\n" );