]> granicus.if.org Git - clang/commitdiff
AVX-512: Implemented AVX-512 FMA intrinsics and tests.
authorElena Demikhovsky <elena.demikhovsky@intel.com>
Mon, 29 Jun 2015 09:20:57 +0000 (09:20 +0000)
committerElena Demikhovsky <elena.demikhovsky@intel.com>
Mon, 29 Jun 2015 09:20:57 +0000 (09:20 +0000)
by Igor Breger

http://reviews.llvm.org/D10797

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@240928 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/Headers/avx512fintrin.h
lib/Headers/avx512vlintrin.h
test/CodeGen/avx512f-builtins.c
test/CodeGen/avx512vl-builtins.c

index 00c1340ac37fc39c55e4be716d72d1dde5cec709..8c70516c029fb852d9a2c4cbf6ba6a24dcfcf174 100644 (file)
@@ -722,12 +722,72 @@ BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "")
 BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "")
 BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "")
 BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfmaddpd512_mask,  "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmsubpd512_mask,  "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfnmaddpd512_mask, "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddps512_mask,  "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmsubps512_mask,  "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfnmaddps512_mask, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddpd128_mask,     "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd128_mask3,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd128_maskz,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd256_mask,     "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd256_mask3,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd256_maskz,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd512_mask,     "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddpd512_mask3,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddpd512_maskz,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddps128_mask,     "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps128_mask3,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps128_maskz,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps256_mask,     "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps256_mask3,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps256_maskz,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps512_mask,     "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddps512_mask3,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddps512_maskz,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd128_mask,  "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd128_mask3, "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd128_maskz, "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd256_mask,  "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd256_mask3, "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd256_maskz, "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd512_mask,  "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubps128_mask,  "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps128_mask3, "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps128_maskz, "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps256_mask,  "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps256_mask3, "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps256_maskz, "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps512_mask,  "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmsubpd128_mask3,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmsubpd256_mask3,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmsubpd512_mask3,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmsubps128_mask3,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmsubps256_mask3,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmsubps512_mask3,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd128_mask3, "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd256_mask3, "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmsubaddps128_mask3, "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddps256_mask3, "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfnmaddpd128_mask,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfnmaddpd256_mask,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfnmaddpd512_mask,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfnmaddps128_mask,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfnmaddps256_mask,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfnmaddps512_mask,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfnmsubpd128_mask,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd128_mask3,   "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd256_mask,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd256_mask3,   "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd512_mask,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfnmsubpd512_mask3,   "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfnmsubps128_mask,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps128_mask3,   "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps256_mask,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps256_mask3,   "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps512_mask,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfnmsubps512_mask3,   "V16fV16fV16fV16fUsIi", "")
 
 // XOP
 BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "")
index 98eb73b3113faeb952fc7f692fd5a8445c822b3e..7f1f8327ec5c0f703e8cad6e5040b85d75891ecc 100644 (file)
@@ -881,72 +881,776 @@ _mm512_abs_epi32(__m512i __A)
   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
                                           -1, _MM_FROUND_CUR_DIRECTION); })
 
+#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), (__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), (__v8df) (C), \
+                                             (__mmask8) (U), (R)); })
+
+
+#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), -(__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), -(__v8df) (C), \
+                                             (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
+                                              (__v8df) (B), -(__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
+                                             (__v8df) (B), (__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
+                                             (__v8df) (B), -(__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
+                                              (__v8df) (B), -(__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
 static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d)
-    __builtin_ia32_vfmaddpd512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask8) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d)
-    __builtin_ia32_vfmsubpd512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask8) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    -(__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    -(__v8df) __C,
+                                                    (__mmask8) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     -(__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d)
-    __builtin_ia32_vfnmaddpd512_mask(__A,
-                                     __B,
-                                     __C,
-                                     (__mmask8) -1,
-                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+                                                    (__v8df) __B,
+                                                    -(__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+                                                     (__v8df) __B,
+                                                     -(__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), (__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), (__v16sf) (C), \
+                                            (__mmask16) (U), (R)); })
+
+
+#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), -(__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), -(__v16sf) (C), \
+                                            (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
+                                             (__v16sf) (B), -(__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
+                                            (__v16sf) (B), (__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
+                                            (__v16sf) (B), -(__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
+                                             (__v16sf) (B), -(__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
 static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512)
-    __builtin_ia32_vfmaddps512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask16) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) __U,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512)
-    __builtin_ia32_vfmsubps512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask16) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   -(__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   -(__v16sf) __C,
+                                                   (__mmask16) __U,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    -(__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512)
-    __builtin_ia32_vfnmaddps512_mask(__A,
-                                     __B,
-                                     __C,
-                                     (__mmask16) -1,
-                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   -(__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    -(__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), (__v8df) (C), \
+                                                (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), (__v8df) (C), \
+                                                (__mmask8) (U), (R)); })
+
+
+#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
+                                                 (__v8df) (B), (__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
+                                                 (__v8df) (B), (__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), -(__v8df) (C), \
+                                                (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), -(__v8df) (C), \
+                                                (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
+                                                 (__v8df) (B), -(__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       (__v8df) __C,
+                                                       (__mmask8) -1,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       (__v8df) __C,
+                                                       (__mmask8) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        (__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        (__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       -(__v8df) __C,
+                                                       (__mmask8) -1,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       -(__v8df) __C,
+                                                       (__mmask8) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        -(__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), (__v16sf) (C), \
+                                               (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), (__v16sf) (C), \
+                                               (__mmask16) (U), (R)); })
+
+
+#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
+                                                (__v16sf) (B), (__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
+                                                (__v16sf) (B), (__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), -(__v16sf) (C), \
+                                               (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), -(__v16sf) (C), \
+                                               (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
+                                                (__v16sf) (B), -(__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      (__v16sf) __C,
+                                                      (__mmask16) -1,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      (__v16sf) __C,
+                                                      (__mmask16) __U,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       (__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       (__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      -(__v16sf) __C,
+                                                      (__mmask16) -1,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      -(__v16sf) __C,
+                                                      (__mmask16) __U,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       -(__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
+                                                 (__v8df) (B), (__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        (__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
+                                                (__v16sf) (B), (__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       (__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
+                                               (__v8df) (B), (__v8df) (C), \
+                                               (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d DEFAULT_FN_ATTRS
+_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+                                                      (__v8df) __B,
+                                                      (__v8df) __C,
+                                                      (__mmask8) __U,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
+                                              (__v16sf) (B), (__v16sf) (C), \
+                                              (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 DEFAULT_FN_ATTRS
+_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+                                                     (__v16sf) __B,
+                                                     (__v16sf) __C,
+                                                     (__mmask16) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+
+
 /* Vector permutations */
 
 static __inline __m512i DEFAULT_FN_ATTRS
index 59ff5ebfd5745348b3cf1dd2774672a31a6dc674..8ead2dac7987069f37028e5fb1d811615c3d20be 100644 (file)
@@ -1320,6 +1320,663 @@ _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
                                          (__v2df)(__m128)(b), \
                                          (p), (__mmask8)(m)); })
 
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+                                                    (__v2df) __B,
+                                                    (__v2df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+                                                    (__v2df) __B,
+                                                    -(__v2df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     -(__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+                                                     (__v2df) __B,
+                                                     -(__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+                                                    (__v4df) __B,
+                                                    (__v4df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+                                                    (__v4df) __B,
+                                                    -(__v4df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     -(__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+                                                     (__v4df) __B,
+                                                     -(__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+                                                   (__v4sf) __B,
+                                                   (__v4sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+                                                   (__v4sf) __B,
+                                                   -(__v4sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    -(__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    -(__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+                                                   (__v8sf) __B,
+                                                   (__v8sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+                                                   (__v8sf) __B,
+                                                   -(__v8sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    -(__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    -(__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+                                                       (__v2df) __B,
+                                                       (__v2df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        (__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        (__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+                                                       (__v2df) __B,
+                                                       -(__v2df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        -(__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+                                                       (__v4df) __B,
+                                                       (__v4df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        (__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        (__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+                                                       (__v4df) __B,
+                                                       -(__v4df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        -(__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+                                                      (__v4sf) __B,
+                                                      (__v4sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       (__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       (__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+                                                      (__v4sf) __B,
+                                                      -(__v4sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       -(__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
+                         __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+                                                      (__v8sf) __B,
+                                                      (__v8sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       (__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       (__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+                                                      (__v8sf) __B,
+                                                      -(__v8sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       -(__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        (__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        (__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       (__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       (__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
+_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
+                                                      (__v2df) __B,
+                                                      (__v2df) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d DEFAULT_FN_ATTRS
+_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
+                                                      (__v4df) __B,
+                                                      (__v4df) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
+_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
+                                                     (__v4sf) __B,
+                                                     (__v4sf) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 DEFAULT_FN_ATTRS
+_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
+                                                     (__v8sf) __B,
+                                                     (__v8sf) __C,
+                                                     (__mmask8) __U);
+}
+
 #undef DEFAULT_FN_ATTRS
 
 #endif /* __AVX512VLINTRIN_H */
index a49a1982bf47ce05c9fd6baddcf2d0dce6b1c53a..112dfd8b6ee124d37bfc259056aeccf81fa2f558 100644 (file)
@@ -201,11 +201,486 @@ __m512d test_mm512_broadcastsd_pd(__m128d a)
   return _mm512_broadcastsd_pd(a);
 }
 
-__m512i test_mm512_fmadd_pd(__m512d a, __m512d b, __m512d c)
-{
+__m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+
+__m512d test_mm512_mask_fmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fnmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fnmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fnmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
   // CHECK-LABEL: @test_mm512_fmadd_pd
-  // CHECK: @llvm.x86.fma.mask.vfmadd.pd.512
-  return _mm512_fmadd_pd(a, b, c);
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmadd_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fnmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fnmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fnmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmadd_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmaddsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmaddsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmaddsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.512
+  return _mm512_mask3_fmaddsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmaddsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmsubadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmsubadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmsubadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmaddsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.512
+  return _mm512_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmsubadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmaddsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmaddsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmaddsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.512
+  return _mm512_mask3_fmaddsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmaddsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmsubadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmsubadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmsubadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmaddsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.512
+  return _mm512_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmsubadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+__m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.512
+  return _mm512_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.512
+  return _mm512_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.512
+  return _mm512_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.512
+  return _mm512_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+__m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.512
+  return _mm512_mask3_fmsubadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.512
+  return _mm512_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.512
+  return _mm512_mask3_fmsubadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.512
+  return _mm512_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+__m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.512
+  return _mm512_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.512
+  return _mm512_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.512
+  return _mm512_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.512
+  return _mm512_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.512
+  return _mm512_mask_fnmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.512
+  return _mm512_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.512
+  return _mm512_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.512
+  return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.512
+  return _mm512_mask_fnmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.512
+  return _mm512_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.512
+  return _mm512_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.512
+  return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U);
 }
 
 __mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
index 9446d467f78ef34c5ce41d37b6997855385f8f00..ce2e5f2aafa0f30a1bcd0ec5a13ba49171eb6a79 100644 (file)
@@ -1121,3 +1121,439 @@ __mmask8 test_mm128_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) {
   // CHECK: @llvm.x86.avx512.mask.cmp.pd.128
   return _mm128_mask_cmp_pd_mask(m, __A, __B, 0);
 }
+
+
+//igorb
+
+__m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.128
+  return _mm_mask_fmadd_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.128
+  return _mm_mask_fmsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.128
+  return _mm_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.128
+  return _mm_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.256
+  return _mm256_mask_fmadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.256
+  return _mm256_mask_fmsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.256
+  return _mm256_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.256
+  return _mm256_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+
+__m128 test_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.128
+  return _mm_mask_fmadd_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.128
+  return _mm_mask_fmsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.128
+  return _mm_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.128
+  return _mm_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.256
+  return _mm256_mask_fmadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.256
+  return _mm256_mask_fmsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.256
+  return _mm256_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.256
+  return _mm256_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+
+__m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.128
+  return _mm_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.128
+  return _mm_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.128
+  return _mm_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.128
+  return _mm_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.128
+  return _mm_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.256
+  return _mm256_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.256
+  return _mm256_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.256
+  return _mm256_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.256
+  return _mm256_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.256
+  return _mm256_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+
+__m128 test_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.128
+  return _mm_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.128
+  return _mm_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.128
+  return _mm_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.128
+  return _mm_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.128
+  return _mm_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.256
+  return _mm256_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.256
+  return _mm256_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.256
+  return _mm256_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.256
+  return _mm256_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.256
+  return _mm256_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+
+__m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.128
+  return _mm_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.256
+  return _mm256_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.128
+  return _mm_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.256
+  return _mm256_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.128
+  return _mm_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.256
+  return _mm256_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.128
+  return _mm_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.256
+  return _mm256_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.128
+  return _mm_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.256
+  return _mm256_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.128
+  return _mm_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.256
+  return _mm256_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.128
+  return _mm_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.128
+  return _mm_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.256
+  return _mm256_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.256
+  return _mm256_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.128
+  return _mm_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.128
+  return _mm_mask3_fnmsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.256
+  return _mm256_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.256
+  return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U);
+}
+