x86: Fix memory operands for inline asm

author Anton Mitrofanov <BugMaster@narod.ru>

Tue, 8 Sep 2020 13:36:24 +0000 (16:36 +0300)

committer Anton Mitrofanov <bugmaster@narod.ru>

Sun, 25 Oct 2020 17:10:15 +0000 (18:10 +0100)
author Anton Mitrofanov <BugMaster@narod.ru>
Tue, 8 Sep 2020 13:36:24 +0000 (16:36 +0300)
committer Anton Mitrofanov <bugmaster@narod.ru>
Sun, 25 Oct 2020 17:10:15 +0000 (18:10 +0100)
diff --git a/common/base.h b/common/base.h

index 414dc06983bd28da6d9cd3f8743bf5abac48c5c9..555cc6852fc88cf8565f4c708e852c9c8d59b768 100644 (file)
--- a/common/base.h
+++ b/common/base.h
@@ -76,6 +76,17 @@ typedef union { x264_uint128_t i; uint64_t q[2]; uint32_t d[4]; uint16_t w[8]; u
  #define CP64(dst,src) M64(dst) = M64(src)
  #define CP128(dst,src) M128(dst) = M128(src)
  
+/* Macros for memory constraints of inline asm */
+#if defined(__GNUC__) && __GNUC__ >= 8 && !defined(__clang__) && !defined(__INTEL_COMPILER)
+#define MEM_FIX(x, t, s) (*(t (*)[s])(x))
+#define MEM_DYN(x, t) (*(t (*)[])(x))
+#else
+//older versions of gcc prefer casting to structure instead of array
+#define MEM_FIX(x, t, s) (*(struct { t a[s]; } (*))(x))
+//let's set an arbitrary large constant size
+#define MEM_DYN(x, t) MEM_FIX(x, t, 4096)
+#endif
+
  /****************************************************************************
   * Constants
   ****************************************************************************/
diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c

index 83a853ef92bb3849c8145dcb234f70e286221336..dde123b34fe44f091df2a3cc70caf7fe0a959d1e 100644 (file)
--- a/common/x86/predict-c.c
+++ b/common/x86/predict-c.c
@@ -91,8 +91,10 @@ static void predict_16x16_p_##name( pixel *src )\
          "paddd        %%xmm1, %%xmm0 \n"\
          "movd         %%xmm0, %0     \n"\
          :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\
-         "m"(*pw_12345678), "m"(*pw_m87654321)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE-1], const pixel, 8)),\
+         "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\
+         "m"(MEM_FIX(pw_12345678, const int16_t, 8)),\
+         "m"(MEM_FIX(pw_m87654321, const int16_t, 8))\
          :"xmm0", "xmm1"\
      );
  #else // !HIGH_BIT_DEPTH
@@ -111,8 +113,11 @@ static void predict_16x16_p_##name( pixel *src )\
          "movd        %%mm0, %0    \n"\
          "movswl        %w0, %0    \n"\
          :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\
-         "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+         "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\
+         "m"(MEM_FIX(&src[-FDEC_STRIDE-8], const pixel, 8)),\
+         "m"(MEM_FIX(pb_12345678, const int8_t, 8)),\
+         "m"(MEM_FIX(pb_m87654321, const int8_t, 8))\
          :"mm0", "mm1"\
      );
  #endif // HIGH_BIT_DEPTH
@@ -231,7 +236,8 @@ static void predict_8x8c_p_##name( pixel *src )\
          "paddd        %%xmm1, %%xmm0 \n"\
          "movd         %%xmm0, %0     \n"\
          :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+         "m"(MEM_FIX(pw_m32101234, const int16_t, 8))\
          :"xmm0", "xmm1"\
      );
  #else // !HIGH_BIT_DEPTH
@@ -246,7 +252,8 @@ static void predict_8x8c_p_##name( pixel *src )\
          "movd        %%mm0, %0    \n"\
          "movswl        %w0, %0    \n"\
          :"=r"(H)\
-        :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\
+        :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+         "m"(MEM_FIX(pb_m32101234, const int8_t, 8))\
          :"mm0", "mm1"\
      );
  #endif // HIGH_BIT_DEPTH
diff --git a/common/x86/util.h b/common/x86/util.h

index 4d4912d6ec03cce6e6828f7f48ad266c46c393c8..db0c7fe7f2e1ebaaa37ea96b5fdad7fbe0b70cc3 100644 (file)
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -91,7 +91,7 @@ static ALWAYS_INLINE int x264_predictor_difference_mmx2( int16_t (*mvc)[2], intp
          "paddd   %%mm0, %%mm4 \n"
          "movd    %%mm4, %0    \n"
          :"=r"(sum), "+r"(i_mvc)
-        :"r"(mvc), "m"(M64( mvc )), "m"(pw_1)
+        :"r"(mvc), "m"(MEM_DYN( mvc, const int16_t )), "m"(pw_1)
          :"mm0", "mm2", "mm3", "mm4", "cc"
      );
      return sum;
@@ -179,8 +179,8 @@ static ALWAYS_INLINE int x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*
          "and          $1, %k2   \n"
          "sub          %2, %4    \n" // output += !(mv == pmv || mv == 0)
          "3:                     \n"
-        :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
-        :"r"(dst), "g"(pmv), "m"(pd_32), "m"(M64( mvc ))
+        :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
+        :"r"(dst), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t ))
          :"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "cc"
      );
      return i;
@@ -247,8 +247,8 @@ static ALWAYS_INLINE int x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16
          "and          $1, %k2   \n"
          "sub          %2, %4    \n"
          "3:                     \n"
-        :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
-        :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(M64( mvc ))
+        :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
+        :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t ))
          :"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "cc"
      );
      return i;
author	Anton Mitrofanov <BugMaster@narod.ru>
	Tue, 8 Sep 2020 13:36:24 +0000 (16:36 +0300)
committer	Anton Mitrofanov <bugmaster@narod.ru>
	Sun, 25 Oct 2020 17:10:15 +0000 (18:10 +0100)
common/base.h		patch \| blob \| history
common/x86/predict-c.c		patch \| blob \| history
common/x86/util.h		patch \| blob \| history