SECTION .text
-cglobal x264_dct4x4dc_mmxext
+cglobal x264_dct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void dct4x4dc( int16_t d[4][4] )
+; void x264_dct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
-x264_dct4x4dc_mmxext:
+x264_dct4x4dc_mmx:
movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16]
movq [parm1q+24],mm4
ret
-cglobal x264_idct4x4dc_mmxext
+cglobal x264_idct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_idct4x4dc_mmxext( int16_t d[4][4] )
+; void x264_idct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
-x264_idct4x4dc_mmxext:
+x264_idct4x4dc_mmx:
movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16]
movq [parm1q+24], mm4
ret
-cglobal x264_sub4x4_dct_mmxext
+cglobal x264_sub4x4_dct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+; void x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
;-----------------------------------------------------------------------------
-x264_sub4x4_dct_mmxext:
+x264_sub4x4_dct_mmx:
firstpush rbx
pushreg rbx
endprolog
ret
endfunc
-cglobal x264_add4x4_idct_mmxext
+cglobal x264_add4x4_idct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
+; void x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
-x264_add4x4_idct_mmxext:
+x264_add4x4_idct_mmx:
; Load dct coeffs
movq mm0, [parm3q+ 0] ; dct
movq mm1, [parm3q+ 8]
cglobal x264_pixel_avg_weight_w8_mmxext
cglobal x264_pixel_avg_weight_w16_mmxext
-cglobal x264_mc_copy_w4_mmxext
-cglobal x264_mc_copy_w8_mmxext
-cglobal x264_mc_copy_w16_mmxext
+cglobal x264_mc_copy_w4_mmx
+cglobal x264_mc_copy_w8_mmx
+cglobal x264_mc_copy_w16_mmx
cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_mmxext
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_mc_copy_w4_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src, int i_src_stride, int i_height )
+; void x264_mc_copy_w4_mmx( uint8_t *dst, int i_dst_stride,
+; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
-x264_mc_copy_w4_mmxext:
+x264_mc_copy_w4_mmx:
mov eax, parm5d ; i_height
ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_mc_copy_w8_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src, int i_src_stride, int i_height )
+; void x264_mc_copy_w8_mmx( uint8_t *dst, int i_dst_stride,
+; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
-x264_mc_copy_w8_mmxext:
+x264_mc_copy_w8_mmx:
mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_mc_copy_w16_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src, int i_src_stride, int i_height )
+; void x264_mc_copy_w16_mmx( uint8_t *dst, int i_dst_stride,
+; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
-x264_mc_copy_w16_mmxext:
+x264_mc_copy_w16_mmx:
mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
jnz .loopcx2
add r10, r11 ; dst2 += dst2_stride
-
dec r15 ; height
- test r15, r15
jnz .loopcy
lea rsp, [rbp]
loophy:
- dec rcx
xor rax, rax
loophx:
add rdx, r11 ; src_pitch
add r9, r10 ; dst_pitch
- test rcx, rcx
+ dec rcx
jnz loophy
ret
cglobal x264_pixel_sad_pde_16x8_mmxext
cglobal x264_pixel_sad_pde_8x16_mmxext
-cglobal x264_pixel_ssd_16x16_mmxext
-cglobal x264_pixel_ssd_16x8_mmxext
-cglobal x264_pixel_ssd_8x16_mmxext
-cglobal x264_pixel_ssd_8x8_mmxext
-cglobal x264_pixel_ssd_8x4_mmxext
-cglobal x264_pixel_ssd_4x8_mmxext
-cglobal x264_pixel_ssd_4x4_mmxext
+cglobal x264_pixel_ssd_16x16_mmx
+cglobal x264_pixel_ssd_16x8_mmx
+cglobal x264_pixel_ssd_8x16_mmx
+cglobal x264_pixel_ssd_8x8_mmx
+cglobal x264_pixel_ssd_8x4_mmx
+cglobal x264_pixel_ssd_4x8_mmx
+cglobal x264_pixel_ssd_4x4_mmx
cglobal x264_pixel_satd_4x4_mmxext
cglobal x264_pixel_satd_4x8_mmxext
ALIGN 16
;-----------------------------------------------------------------------------
-; int x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int )
+; int x264_pixel_ssd_16x16_mmx (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
-x264_pixel_ssd_16x16_mmxext:
+x264_pixel_ssd_16x16_mmx:
SSD_START
SSD_INC_8x16P
SSD_INC_8x16P
SSD_END
ALIGN 16
-x264_pixel_ssd_16x8_mmxext:
+x264_pixel_ssd_16x8_mmx:
SSD_START
SSD_INC_8x16P
SSD_END
ALIGN 16
-x264_pixel_ssd_8x16_mmxext:
+x264_pixel_ssd_8x16_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
-x264_pixel_ssd_8x8_mmxext:
+x264_pixel_ssd_8x8_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
-x264_pixel_ssd_8x4_mmxext:
+x264_pixel_ssd_8x4_mmx:
SSD_START
SSD_INC_4x8P
SSD_END
ALIGN 16
-x264_pixel_ssd_4x8_mmxext:
+x264_pixel_ssd_4x8_mmx:
SSD_START
SSD_INC_4x4P
SSD_INC_4x4P
SSD_END
ALIGN 16
-x264_pixel_ssd_4x4_mmxext:
+x264_pixel_ssd_4x4_mmx:
SSD_START
SSD_INC_4x4P
SSD_END
dctf->idct2x2dc = dct2x2dc;
#ifdef HAVE_MMXEXT
- if( cpu&X264_CPU_MMXEXT )
+ if( cpu&X264_CPU_MMX )
{
- dctf->sub4x4_dct = x264_sub4x4_dct_mmxext;
- dctf->sub8x8_dct = x264_sub8x8_dct_mmxext;
- dctf->sub16x16_dct = x264_sub16x16_dct_mmxext;
+ dctf->sub4x4_dct = x264_sub4x4_dct_mmx;
+ dctf->sub8x8_dct = x264_sub8x8_dct_mmx;
+ dctf->sub16x16_dct = x264_sub16x16_dct_mmx;
- dctf->add4x4_idct = x264_add4x4_idct_mmxext;
- dctf->add8x8_idct = x264_add8x8_idct_mmxext;
- dctf->add16x16_idct = x264_add16x16_idct_mmxext;
+ dctf->add4x4_idct = x264_add4x4_idct_mmx;
+ dctf->add8x8_idct = x264_add8x8_idct_mmx;
+ dctf->add16x16_idct = x264_add16x16_idct_mmx;
- dctf->dct4x4dc = x264_dct4x4dc_mmxext;
- dctf->idct4x4dc = x264_idct4x4dc_mmxext;
- }
+ dctf->dct4x4dc = x264_dct4x4dc_mmx;
+ dctf->idct4x4dc = x264_idct4x4dc_mmx;
#ifndef ARCH_X86_64
- if( cpu&X264_CPU_MMX )
- {
dctf->sub8x8_dct8 = x264_sub8x8_dct8_mmx;
dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
}
#endif
+
/* FIXME altivec dct is not transposed yet
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
SECTION .text
-cglobal x264_dct4x4dc_mmxext
+cglobal x264_dct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void __cdecl dct4x4dc( int16_t d[4][4] )
+; void __cdecl x264_dct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
-x264_dct4x4dc_mmxext:
+x264_dct4x4dc_mmx:
mov eax, [esp+ 4]
movq mm0, [eax+ 0]
movq mm1, [eax+ 8]
picpop ebx
ret
-cglobal x264_idct4x4dc_mmxext
+cglobal x264_idct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void __cdecl x264_idct4x4dc_mmxext( int16_t d[4][4] )
+; void __cdecl x264_idct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
-x264_idct4x4dc_mmxext:
+x264_idct4x4dc_mmx:
mov eax, [esp+ 4]
movq mm0, [eax+ 0]
movq mm1, [eax+ 8]
movq [eax+24], mm4
ret
-cglobal x264_sub4x4_dct_mmxext
+cglobal x264_sub4x4_dct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void __cdecl x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+; void __cdecl x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
;-----------------------------------------------------------------------------
-x264_sub4x4_dct_mmxext:
+x264_sub4x4_dct_mmx:
push ebx
mov eax, [esp+12] ; pix1
mov ebx, [esp+16] ; i_pix1
pop ebx
ret
-cglobal x264_add4x4_idct_mmxext
+cglobal x264_add4x4_idct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
-; void __cdecl x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
+; void __cdecl x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
-x264_add4x4_idct_mmxext:
+x264_add4x4_idct_mmx:
; Load dct coeffs
mov eax, [esp+12] ; dct
movq mm0, [eax+ 0]
#include "dct.h"
-void x264_sub8x8_dct_mmxext( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+void x264_sub8x8_dct_mmx( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
- x264_sub4x4_dct_mmxext( dct[0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
- x264_sub4x4_dct_mmxext( dct[1], &pix1[4], i_pix1, &pix2[4], i_pix2 );
- x264_sub4x4_dct_mmxext( dct[2], &pix1[4*i_pix1+0], i_pix1, &pix2[4*i_pix2+0], i_pix2 );
- x264_sub4x4_dct_mmxext( dct[3], &pix1[4*i_pix1+4], i_pix1, &pix2[4*i_pix2+4], i_pix2 );
+ x264_sub4x4_dct_mmx( dct[0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
+ x264_sub4x4_dct_mmx( dct[1], &pix1[4], i_pix1, &pix2[4], i_pix2 );
+ x264_sub4x4_dct_mmx( dct[2], &pix1[4*i_pix1+0], i_pix1, &pix2[4*i_pix2+0], i_pix2 );
+ x264_sub4x4_dct_mmx( dct[3], &pix1[4*i_pix1+4], i_pix1, &pix2[4*i_pix2+4], i_pix2 );
}
-void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+void x264_sub16x16_dct_mmx( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
- x264_sub8x8_dct_mmxext( &dct[ 0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
- x264_sub8x8_dct_mmxext( &dct[ 4], &pix1[8], i_pix1, &pix2[8], i_pix2 );
- x264_sub8x8_dct_mmxext( &dct[ 8], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
- x264_sub8x8_dct_mmxext( &dct[12], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
+ x264_sub8x8_dct_mmx( &dct[ 0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
+ x264_sub8x8_dct_mmx( &dct[ 4], &pix1[8], i_pix1, &pix2[8], i_pix2 );
+ x264_sub8x8_dct_mmx( &dct[ 8], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
+ x264_sub8x8_dct_mmx( &dct[12], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
}
* addXxX_idct:
****************************************************************************/
-void x264_add8x8_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] )
+void x264_add8x8_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] )
{
- x264_add4x4_idct_mmxext( p_dst, i_dst, dct[0] );
- x264_add4x4_idct_mmxext( &p_dst[4], i_dst, dct[1] );
- x264_add4x4_idct_mmxext( &p_dst[4*i_dst+0], i_dst, dct[2] );
- x264_add4x4_idct_mmxext( &p_dst[4*i_dst+4], i_dst, dct[3] );
+ x264_add4x4_idct_mmx( p_dst, i_dst, dct[0] );
+ x264_add4x4_idct_mmx( &p_dst[4], i_dst, dct[1] );
+ x264_add4x4_idct_mmx( &p_dst[4*i_dst+0], i_dst, dct[2] );
+ x264_add4x4_idct_mmx( &p_dst[4*i_dst+4], i_dst, dct[3] );
}
-void x264_add16x16_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
+void x264_add16x16_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
{
- x264_add8x8_idct_mmxext( &p_dst[0], i_dst, &dct[0] );
- x264_add8x8_idct_mmxext( &p_dst[8], i_dst, &dct[4] );
- x264_add8x8_idct_mmxext( &p_dst[8*i_dst], i_dst, &dct[8] );
- x264_add8x8_idct_mmxext( &p_dst[8*i_dst+8], i_dst, &dct[12] );
+ x264_add8x8_idct_mmx( &p_dst[0], i_dst, &dct[0] );
+ x264_add8x8_idct_mmx( &p_dst[8], i_dst, &dct[4] );
+ x264_add8x8_idct_mmx( &p_dst[8*i_dst], i_dst, &dct[8] );
+ x264_add8x8_idct_mmx( &p_dst[8*i_dst+8], i_dst, &dct[12] );
}
/***********************
#ifndef _I386_DCT_H
#define _I386_DCT_H 1
-void x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
-void x264_sub8x8_dct_mmxext( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
-void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
+void x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
+void x264_sub8x8_dct_mmx( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
+void x264_sub16x16_dct_mmx( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
-void x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] );
-void x264_add8x8_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] );
-void x264_add16x16_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
+void x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] );
+void x264_add8x8_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] );
+void x264_add16x16_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
-void x264_dct4x4dc_mmxext( int16_t d[4][4] );
-void x264_idct4x4dc_mmxext( int16_t d[4][4] );
+void x264_dct4x4dc_mmx( int16_t d[4][4] );
+void x264_idct4x4dc_mmx( int16_t d[4][4] );
void x264_sub8x8_dct8_mmx( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub16x16_dct8_mmx( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
cglobal x264_pixel_avg_weight_w8_mmxext
cglobal x264_pixel_avg_weight_w16_mmxext
-cglobal x264_mc_copy_w4_mmxext
-cglobal x264_mc_copy_w8_mmxext
-cglobal x264_mc_copy_w16_mmxext
+cglobal x264_mc_copy_w4_mmx
+cglobal x264_mc_copy_w8_mmx
+cglobal x264_mc_copy_w16_mmx
cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_mmxext
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride,
-; uint8_t *dst, int i_dst_stride, int i_height )
+; void x264_mc_copy_w4_mmx( uint8_t *src, int i_src_stride,
+; uint8_t *dst, int i_dst_stride, int i_height )
;-----------------------------------------------------------------------------
-x264_mc_copy_w4_mmxext:
+x264_mc_copy_w4_mmx:
push ebx
push esi
push edi
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_mc_copy_w8_mmxext( uint8_t *src, int i_src_stride,
-; uint8_t *dst, int i_dst_stride, int i_height )
+; void x264_mc_copy_w8_mmx( uint8_t *src, int i_src_stride,
+; uint8_t *dst, int i_dst_stride, int i_height )
;-----------------------------------------------------------------------------
-x264_mc_copy_w8_mmxext:
+x264_mc_copy_w8_mmx:
push ebx
push esi
push edi
ALIGN 16
;-----------------------------------------------------------------------------
-; void x264_mc_copy_w16_mmxext( uint8_t *src, int i_src_stride,
-; uint8_t *dst, int i_dst_stride, int i_height )
+; void x264_mc_copy_w16_mmx( uint8_t *src, int i_src_stride,
+; uint8_t *dst, int i_dst_stride, int i_height )
;-----------------------------------------------------------------------------
-x264_mc_copy_w16_mmxext:
+x264_mc_copy_w16_mmx:
push ebx
push esi
push edi
add edi, [picesp + tdstp2]
mov [picesp + tdst2], edi
- mov ebp, [picesp + theight]
- dec ebp
- test ebp, ebp
- mov [picesp + theight], ebp
+ dec dword [picesp + theight]
jnz loopcy
picpop ebx
loophy:
- dec ecx
xor eax, eax
loophx:
add esi, [esp + 24] ; src_pitch
add edi, [esp + 16] ; dst_pitch
- test ecx, ecx
+ dec ecx
jnz loophy
pop esi
extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int );
-extern void x264_mc_copy_w4_mmxext( uint8_t *, int, uint8_t *, int, int );
-extern void x264_mc_copy_w8_mmxext( uint8_t *, int, uint8_t *, int, int );
-extern void x264_mc_copy_w16_mmxext( uint8_t *, int, uint8_t *, int, int );
+extern void x264_mc_copy_w4_mmx( uint8_t *, int, uint8_t *, int, int );
+extern void x264_mc_copy_w8_mmx( uint8_t *, int, uint8_t *, int, int );
+extern void x264_mc_copy_w16_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_sse2( uint8_t *, int, uint8_t *, int, int );
#define AVG(W,H) \
NULL,
x264_pixel_avg_w16_mmxext
};
-static void (* const x264_mc_copy_wtab_mmxext[5])( uint8_t *, int, uint8_t *, int, int ) =
+static void (* const x264_mc_copy_wtab_mmx[5])( uint8_t *, int, uint8_t *, int, int ) =
{
NULL,
- x264_mc_copy_w4_mmxext,
- x264_mc_copy_w8_mmxext,
+ x264_mc_copy_w4_mmx,
+ x264_mc_copy_w8_mmx,
NULL,
- x264_mc_copy_w16_mmxext
+ x264_mc_copy_w16_mmx
};
static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
}
else
{
- x264_mc_copy_wtab_mmxext[i_width>>2](
+ x264_mc_copy_wtab_mmx[i_width>>2](
dst, i_dst_stride, src1, i_src_stride, i_height );
}
}
pf->avg_weight[PIXEL_4x4] = x264_pixel_avg_weight_4x4_mmxext;
// avg_weight_4x8 is rare and 4x2 is not used
- pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmxext;
- pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmxext;
- pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmxext;
+ pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
+ pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
+ pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
}
void x264_mc_sse2_init( x264_mc_functions_t *pf )
{
cglobal x264_pixel_sad_pde_16x8_mmxext
cglobal x264_pixel_sad_pde_8x16_mmxext
-cglobal x264_pixel_ssd_16x16_mmxext
-cglobal x264_pixel_ssd_16x8_mmxext
-cglobal x264_pixel_ssd_8x16_mmxext
-cglobal x264_pixel_ssd_8x8_mmxext
-cglobal x264_pixel_ssd_8x4_mmxext
-cglobal x264_pixel_ssd_4x8_mmxext
-cglobal x264_pixel_ssd_4x4_mmxext
+cglobal x264_pixel_ssd_16x16_mmx
+cglobal x264_pixel_ssd_16x8_mmx
+cglobal x264_pixel_ssd_8x16_mmx
+cglobal x264_pixel_ssd_8x8_mmx
+cglobal x264_pixel_ssd_8x4_mmx
+cglobal x264_pixel_ssd_4x8_mmx
+cglobal x264_pixel_ssd_4x4_mmx
cglobal x264_pixel_satd_4x4_mmxext
cglobal x264_pixel_satd_4x8_mmxext
ALIGN 16
;-----------------------------------------------------------------------------
-; int __cdecl x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int )
+; int __cdecl x264_pixel_ssd_16x16_mmx (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
-x264_pixel_ssd_16x16_mmxext:
+x264_pixel_ssd_16x16_mmx:
SSD_START
SSD_INC_8x16P
SSD_INC_8x16P
SSD_END
ALIGN 16
-x264_pixel_ssd_16x8_mmxext:
+x264_pixel_ssd_16x8_mmx:
SSD_START
SSD_INC_8x16P
SSD_END
ALIGN 16
-x264_pixel_ssd_8x16_mmxext:
+x264_pixel_ssd_8x16_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
-x264_pixel_ssd_8x8_mmxext:
+x264_pixel_ssd_8x8_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
-x264_pixel_ssd_8x4_mmxext:
+x264_pixel_ssd_8x4_mmx:
SSD_START
SSD_INC_4x8P
SSD_END
ALIGN 16
-x264_pixel_ssd_4x8_mmxext:
+x264_pixel_ssd_4x8_mmx:
SSD_START
SSD_INC_4x4P
SSD_INC_4x4P
SSD_END
ALIGN 16
-x264_pixel_ssd_4x4_mmxext:
+x264_pixel_ssd_4x4_mmx:
SSD_START
SSD_INC_4x4P
SSD_END
int x264_pixel_sad_pde_16x8_mmxext( uint8_t *, int, uint8_t *, int, int );
int x264_pixel_sad_pde_8x16_mmxext( uint8_t *, int, uint8_t *, int, int );
-int x264_pixel_ssd_16x16_mmxext( uint8_t *, int, uint8_t *, int );
-int x264_pixel_ssd_16x8_mmxext( uint8_t *, int, uint8_t *, int );
-int x264_pixel_ssd_8x16_mmxext( uint8_t *, int, uint8_t *, int );
-int x264_pixel_ssd_8x8_mmxext( uint8_t *, int, uint8_t *, int );
-int x264_pixel_ssd_8x4_mmxext( uint8_t *, int, uint8_t *, int );
-int x264_pixel_ssd_4x8_mmxext( uint8_t *, int, uint8_t *, int );
-int x264_pixel_ssd_4x4_mmxext( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_16x16_mmx( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_16x8_mmx( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_8x16_mmx( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_8x8_mmx( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_8x4_mmx( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_4x8_mmx( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_4x4_mmx( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_16x16_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_16x8_mmxext( uint8_t *, int, uint8_t *, int );
pixf->sa8d[PIXEL_8x8] = pixel_sa8d_8x8;
#ifdef HAVE_MMXEXT
+ if( cpu&X264_CPU_MMX )
+ {
+ pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_mmx;
+ pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_mmx;
+ pixf->ssd[PIXEL_8x16] = x264_pixel_ssd_8x16_mmx;
+ pixf->ssd[PIXEL_8x8] = x264_pixel_ssd_8x8_mmx;
+ pixf->ssd[PIXEL_8x4] = x264_pixel_ssd_8x4_mmx;
+ pixf->ssd[PIXEL_4x8] = x264_pixel_ssd_4x8_mmx;
+ pixf->ssd[PIXEL_4x4] = x264_pixel_ssd_4x4_mmx;
+ }
+
if( cpu&X264_CPU_MMXEXT )
{
pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_mmxext;
pixf->sad_pde[PIXEL_16x8 ] = x264_pixel_sad_pde_16x8_mmxext;
pixf->sad_pde[PIXEL_8x16 ] = x264_pixel_sad_pde_8x16_mmxext;
- pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_mmxext;
- pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_mmxext;
- pixf->ssd[PIXEL_8x16] = x264_pixel_ssd_8x16_mmxext;
- pixf->ssd[PIXEL_8x8] = x264_pixel_ssd_8x8_mmxext;
- pixf->ssd[PIXEL_8x4] = x264_pixel_ssd_8x4_mmxext;
- pixf->ssd[PIXEL_4x8] = x264_pixel_ssd_4x8_mmxext;
- pixf->ssd[PIXEL_4x4] = x264_pixel_ssd_4x4_mmxext;
-
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_mmxext;
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_mmxext;
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_mmxext;
}
#endif
#ifdef ARCH_UltraSparc
- pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_vis;
- pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_vis;
- pixf->sad[PIXEL_16x8] = x264_pixel_sad_16x8_vis;
- pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_vis;
+ pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_vis;
+ pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_vis;
+ pixf->sad[PIXEL_16x8] = x264_pixel_sad_16x8_vis;
+ pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_vis;
#endif
}