From: Anton Mitrofanov Date: Tue, 8 Sep 2020 13:36:24 +0000 (+0300) Subject: x86: Fix memory operands for inline asm X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=37329c4f103327b6d306c8148c79d9658419231b;p=libx264 x86: Fix memory operands for inline asm --- diff --git a/common/base.h b/common/base.h index 414dc069..555cc685 100644 --- a/common/base.h +++ b/common/base.h @@ -76,6 +76,17 @@ typedef union { x264_uint128_t i; uint64_t q[2]; uint32_t d[4]; uint16_t w[8]; u #define CP64(dst,src) M64(dst) = M64(src) #define CP128(dst,src) M128(dst) = M128(src) +/* Macros for memory constraints of inline asm */ +#if defined(__GNUC__) && __GNUC__ >= 8 && !defined(__clang__) && !defined(__INTEL_COMPILER) +#define MEM_FIX(x, t, s) (*(t (*)[s])(x)) +#define MEM_DYN(x, t) (*(t (*)[])(x)) +#else +//older versions of gcc prefer casting to structure instead of array +#define MEM_FIX(x, t, s) (*(struct { t a[s]; } (*))(x)) +//let's set an arbitrary large constant size +#define MEM_DYN(x, t) MEM_FIX(x, t, 4096) +#endif + /**************************************************************************** * Constants ****************************************************************************/ diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c index 83a853ef..dde123b3 100644 --- a/common/x86/predict-c.c +++ b/common/x86/predict-c.c @@ -91,8 +91,10 @@ static void predict_16x16_p_##name( pixel *src )\ "paddd %%xmm1, %%xmm0 \n"\ "movd %%xmm0, %0 \n"\ :"=r"(H)\ - :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\ - "m"(*pw_12345678), "m"(*pw_m87654321)\ + :"m"(MEM_FIX(&src[-FDEC_STRIDE-1], const pixel, 8)),\ + "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\ + "m"(MEM_FIX(pw_12345678, const int16_t, 8)),\ + "m"(MEM_FIX(pw_m87654321, const int16_t, 8))\ :"xmm0", "xmm1"\ ); #else // !HIGH_BIT_DEPTH @@ -111,8 +113,11 @@ static void predict_16x16_p_##name( pixel *src )\ "movd %%mm0, %0 \n"\ "movswl %w0, %0 \n"\ :"=r"(H)\ - :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\ - "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\ + :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\ + "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\ + "m"(MEM_FIX(&src[-FDEC_STRIDE-8], const pixel, 8)),\ + "m"(MEM_FIX(pb_12345678, const int8_t, 8)),\ + "m"(MEM_FIX(pb_m87654321, const int8_t, 8))\ :"mm0", "mm1"\ ); #endif // HIGH_BIT_DEPTH @@ -231,7 +236,8 @@ static void predict_8x8c_p_##name( pixel *src )\ "paddd %%xmm1, %%xmm0 \n"\ "movd %%xmm0, %0 \n"\ :"=r"(H)\ - :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\ + :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\ + "m"(MEM_FIX(pw_m32101234, const int16_t, 8))\ :"xmm0", "xmm1"\ ); #else // !HIGH_BIT_DEPTH @@ -246,7 +252,8 @@ static void predict_8x8c_p_##name( pixel *src )\ "movd %%mm0, %0 \n"\ "movswl %w0, %0 \n"\ :"=r"(H)\ - :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\ + :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\ + "m"(MEM_FIX(pb_m32101234, const int8_t, 8))\ :"mm0", "mm1"\ ); #endif // HIGH_BIT_DEPTH diff --git a/common/x86/util.h b/common/x86/util.h index 4d4912d6..db0c7fe7 100644 --- a/common/x86/util.h +++ b/common/x86/util.h @@ -91,7 +91,7 @@ static ALWAYS_INLINE int x264_predictor_difference_mmx2( int16_t (*mvc)[2], intp "paddd %%mm0, %%mm4 \n" "movd %%mm4, %0 \n" :"=r"(sum), "+r"(i_mvc) - :"r"(mvc), "m"(M64( mvc )), "m"(pw_1) + :"r"(mvc), "m"(MEM_DYN( mvc, const int16_t )), "m"(pw_1) :"mm0", "mm2", "mm3", "mm4", "cc" ); return sum; @@ -179,8 +179,8 @@ static ALWAYS_INLINE int x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (* "and $1, %k2 \n" "sub %2, %4 \n" // output += !(mv == pmv || mv == 0) "3: \n" - :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i) - :"r"(dst), "g"(pmv), "m"(pd_32), "m"(M64( mvc )) + :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i) + :"r"(dst), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t )) :"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "cc" ); return i; @@ -247,8 +247,8 @@ static ALWAYS_INLINE int x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16 "and $1, %k2 \n" "sub %2, %4 \n" "3: \n" - :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i) - :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(M64( mvc )) + :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i) + :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t )) :"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "cc" ); return i;