#define CP64(dst,src) M64(dst) = M64(src)
#define CP128(dst,src) M128(dst) = M128(src)
+/* Macros for memory constraints of inline asm */
+#if defined(__GNUC__) && __GNUC__ >= 8 && !defined(__clang__) && !defined(__INTEL_COMPILER)
+#define MEM_FIX(x, t, s) (*(t (*)[s])(x))
+#define MEM_DYN(x, t) (*(t (*)[])(x))
+#else
+//older versions of gcc prefer casting to structure instead of array
+#define MEM_FIX(x, t, s) (*(struct { t a[s]; } (*))(x))
+//let's set an arbitrary large constant size
+#define MEM_DYN(x, t) MEM_FIX(x, t, 4096)
+#endif
+
/****************************************************************************
* Constants
****************************************************************************/
"paddd %%xmm1, %%xmm0 \n"\
"movd %%xmm0, %0 \n"\
:"=r"(H)\
- :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\
- "m"(*pw_12345678), "m"(*pw_m87654321)\
+ :"m"(MEM_FIX(&src[-FDEC_STRIDE-1], const pixel, 8)),\
+ "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\
+ "m"(MEM_FIX(pw_12345678, const int16_t, 8)),\
+ "m"(MEM_FIX(pw_m87654321, const int16_t, 8))\
:"xmm0", "xmm1"\
);
#else // !HIGH_BIT_DEPTH
"movd %%mm0, %0 \n"\
"movswl %w0, %0 \n"\
:"=r"(H)\
- :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\
- "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\
+ :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+ "m"(MEM_FIX(&src[-FDEC_STRIDE+8], const pixel, 8)),\
+ "m"(MEM_FIX(&src[-FDEC_STRIDE-8], const pixel, 8)),\
+ "m"(MEM_FIX(pb_12345678, const int8_t, 8)),\
+ "m"(MEM_FIX(pb_m87654321, const int8_t, 8))\
:"mm0", "mm1"\
);
#endif // HIGH_BIT_DEPTH
"paddd %%xmm1, %%xmm0 \n"\
"movd %%xmm0, %0 \n"\
:"=r"(H)\
- :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\
+ :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+ "m"(MEM_FIX(pw_m32101234, const int16_t, 8))\
:"xmm0", "xmm1"\
);
#else // !HIGH_BIT_DEPTH
"movd %%mm0, %0 \n"\
"movswl %w0, %0 \n"\
:"=r"(H)\
- :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\
+ :"m"(MEM_FIX(&src[-FDEC_STRIDE], const pixel, 8)),\
+ "m"(MEM_FIX(pb_m32101234, const int8_t, 8))\
:"mm0", "mm1"\
);
#endif // HIGH_BIT_DEPTH
"paddd %%mm0, %%mm4 \n"
"movd %%mm4, %0 \n"
:"=r"(sum), "+r"(i_mvc)
- :"r"(mvc), "m"(M64( mvc )), "m"(pw_1)
+ :"r"(mvc), "m"(MEM_DYN( mvc, const int16_t )), "m"(pw_1)
:"mm0", "mm2", "mm3", "mm4", "cc"
);
return sum;
"and $1, %k2 \n"
"sub %2, %4 \n" // output += !(mv == pmv || mv == 0)
"3: \n"
- :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
- :"r"(dst), "g"(pmv), "m"(pd_32), "m"(M64( mvc ))
+ :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
+ :"r"(dst), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t ))
:"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "cc"
);
return i;
"and $1, %k2 \n"
"sub %2, %4 \n"
"3: \n"
- :"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
- :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(M64( mvc ))
+ :"+r"(mvc), "=m"(MEM_DYN( dst, int16_t )), "+r"(tmp), "+r"(mvc_max), "+r"(i)
+ :"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(MEM_DYN( mvc, const int16_t ))
:"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "cc"
);
return i;