From 9df377f87702c82a2202d34919c07e32c60b40ae Mon Sep 17 00:00:00 2001 From: Anton Mitrofanov Date: Thu, 28 Aug 2014 20:13:13 +0400 Subject: [PATCH] Fix inappropriate instruction use --- common/dct.c | 2 +- common/quant.c | 4 ++-- common/x86/dct-a.asm | 2 +- common/x86/dct.h | 2 +- common/x86/pixel-a.asm | 2 +- common/x86/quant-a.asm | 2 +- common/x86/quant.h | 4 ++-- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common/dct.c b/common/dct.c index f5900efd..08f4e893 100644 --- a/common/dct.c +++ b/common/dct.c @@ -611,7 +611,6 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf ) { dctf->sub4x4_dct = x264_sub4x4_dct_mmx; dctf->add4x4_idct = x264_add4x4_idct_mmx; - dctf->dct4x4dc = x264_dct4x4dc_mmx; dctf->idct4x4dc = x264_idct4x4dc_mmx; dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2; @@ -630,6 +629,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf ) if( cpu&X264_CPU_MMX2 ) { + dctf->dct4x4dc = x264_dct4x4dc_mmx2; dctf->add8x8_idct_dc = x264_add8x8_idct_dc_mmx2; dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2; } diff --git a/common/quant.c b/common/quant.c index d7b69115..31d8901d 100644 --- a/common/quant.c +++ b/common/quant.c @@ -558,8 +558,6 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ) if( cpu&X264_CPU_MMX ) { #if ARCH_X86 - pf->quant_4x4 = x264_quant_4x4_mmx; - pf->quant_8x8 = x264_quant_8x8_mmx; pf->dequant_4x4 = x264_dequant_4x4_mmx; pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2; pf->dequant_8x8 = x264_dequant_8x8_mmx; @@ -576,6 +574,8 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ) { pf->quant_2x2_dc = x264_quant_2x2_dc_mmx2; #if ARCH_X86 + pf->quant_4x4 = x264_quant_4x4_mmx2; + pf->quant_8x8 = x264_quant_8x8_mmx2; pf->quant_4x4_dc = x264_quant_4x4_dc_mmx2; pf->decimate_score15 = x264_decimate_score15_mmx2; pf->decimate_score16 = x264_decimate_score16_mmx2; diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm index 4376e369..bc82ff63 100644 --- a/common/x86/dct-a.asm +++ b/common/x86/dct-a.asm @@ -143,7 +143,7 @@ INIT_XMM avx DCT4x4_DC %else -INIT_MMX mmx +INIT_MMX mmx2 cglobal dct4x4dc, 1,1 movq m3, [r0+24] movq m2, [r0+16] diff --git a/common/x86/dct.h b/common/x86/dct.h index 337a6327..f22a979a 100644 --- a/common/x86/dct.h +++ b/common/x86/dct.h @@ -70,7 +70,7 @@ void x264_add8x8_idct_dc_avx ( pixel *p_dst, dctcoef dct [ 4] ); void x264_add16x16_idct_dc_avx ( pixel *p_dst, dctcoef dct [16] ); void x264_add16x16_idct_dc_avx2 ( uint8_t *p_dst, int16_t dct [16] ); -void x264_dct4x4dc_mmx ( int16_t d[16] ); +void x264_dct4x4dc_mmx2 ( int16_t d[16] ); void x264_dct4x4dc_sse2 ( int32_t d[16] ); void x264_dct4x4dc_avx ( int32_t d[16] ); void x264_idct4x4dc_mmx ( int16_t d[16] ); diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm index 262c5377..f5f6a82e 100644 --- a/common/x86/pixel-a.asm +++ b/common/x86/pixel-a.asm @@ -1600,7 +1600,7 @@ cglobal pixel_satd_4x4, 4,6 %macro SATDS_SSE2 0 %define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH) -%if vertical==0 || HIGH_BIT_DEPTH +%if cpuflag(ssse3) && (vertical==0 || HIGH_BIT_DEPTH) cglobal pixel_satd_4x4, 4, 6, 6 SATD_START_MMX mova m4, [hmul_4p] diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm index fb588d36..731f7d15 100644 --- a/common/x86/quant-a.asm +++ b/common/x86/quant-a.asm @@ -453,7 +453,7 @@ INIT_MMX mmx2 QUANT_DC quant_2x2_dc, 1 %if ARCH_X86_64 == 0 ; not needed because sse2 is faster QUANT_DC quant_4x4_dc, 4 -INIT_MMX mmx +INIT_MMX mmx2 QUANT_AC quant_4x4, 4 QUANT_AC quant_8x8, 16 %endif diff --git a/common/x86/quant.h b/common/x86/quant.h index 1fcb8001..c6a8a9b1 100644 --- a/common/x86/quant.h +++ b/common/x86/quant.h @@ -30,8 +30,8 @@ int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias ); int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias ); -int x264_quant_4x4_mmx( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); -int x264_quant_8x8_mmx( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ); +int x264_quant_4x4_mmx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); +int x264_quant_8x8_mmx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ); int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias ); int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias ); int x264_quant_4x4_sse2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); -- 2.40.0