return __W;
}
+#define _mm_fmadd_round_ss(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
(__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(W), \
(__v2df)(__m128d)(A), \
return __W;
}
+#define _mm_fmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ -(__v4sf)(__m128)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
(__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
(__v4sf)(__m128)(A), \
return __W;
}
+#define _mm_fnmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
(__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
-(__v4sf)(__m128)(A), \
return __W;
}
+#define _mm_fnmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
+ -(__v4sf)(__m128)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
(__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
-(__v4sf)(__m128)(A), \
return __W;
}
+#define _mm_fmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
(__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
(__v2df)(__m128d)(A), \
return __W;
}
+#define _mm_fmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ -(__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
(__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
(__v2df)(__m128d)(A), \
return __W;
}
+#define _mm_fnmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
(__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
-(__v2df)(__m128d)(A), \
return __W;
}
+#define _mm_fnmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
+ -(__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R))
+
#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
(__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
-(__v2df)(__m128d)(A), \
return _mm_mask_fmadd_ss(__W, __U, __A, __B);
}
+__m128 test_mm_fmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
+ // CHECK-LABEL: @test_mm_fmadd_round_ss
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+ return _mm_fmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128 test_mm_mask_fmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmadd_round_ss
// CHECK: @llvm.x86.avx512.mask.vfmadd.ss
- return _mm_mask_fmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
__m128 test_mm_maskz_fmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_round_ss
// CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
- return _mm_maskz_fmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
__m128 test_mm_mask3_fmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_round_ss
// CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
- return _mm_mask3_fmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
return _mm_mask_fmsub_ss(__W, __U, __A, __B);
}
+__m128 test_mm_fmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
+ // CHECK-LABEL: @test_mm_fmsub_round_ss
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+ return _mm_fmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128 test_mm_mask_fmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmsub_round_ss
// CHECK: @llvm.x86.avx512.mask.vfmadd.ss
- return _mm_mask_fmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
__m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_round_ss
// CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
- return _mm_maskz_fmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
__m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_round_ss
// CHECK: @llvm.x86.avx512.mask3.vfmsub.ss
- return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
return _mm_mask_fnmadd_ss(__W, __U, __A, __B);
}
+__m128 test_mm_fnmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
+ // CHECK-LABEL: @test_mm_fnmadd_round_ss
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+ return _mm_fnmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128 test_mm_mask_fnmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_round_ss
// CHECK: @llvm.x86.avx512.mask.vfmadd.ss
- return _mm_mask_fnmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fnmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
__m128 test_mm_maskz_fnmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_round_ss
// CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
- return _mm_maskz_fnmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fnmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
__m128 test_mm_mask3_fnmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_round_ss
// CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
- return _mm_mask3_fnmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fnmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
return _mm_mask_fnmsub_ss(__W, __U, __A, __B);
}
+__m128 test_mm_fnmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
+ // CHECK-LABEL: @test_mm_fnmsub_round_ss
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+ return _mm_fnmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128 test_mm_mask_fnmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_round_ss
// CHECK: @llvm.x86.avx512.mask.vfmadd.ss
- return _mm_mask_fnmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fnmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
__m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_round_ss
// CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
- return _mm_maskz_fnmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fnmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
__m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss
// CHECK: @llvm.x86.avx512.mask3.vfmsub.ss
- return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
return _mm_mask_fmadd_sd(__W, __U, __A, __B);
}
+__m128d test_mm_fmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
+ // CHECK-LABEL: @test_mm_fmadd_round_sd
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+ return _mm_fmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128d test_mm_mask_fmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmadd_round_sd
// CHECK: @llvm.x86.avx512.mask.vfmadd.sd
- return _mm_mask_fmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
__m128d test_mm_maskz_fmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_round_sd
// CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
- return _mm_maskz_fmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
__m128d test_mm_mask3_fmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_round_sd
// CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
- return _mm_mask3_fmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
return _mm_mask_fmsub_sd(__W, __U, __A, __B);
}
+__m128d test_mm_fmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
+ // CHECK-LABEL: @test_mm_fmsub_round_sd
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+ return _mm_fmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128d test_mm_mask_fmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmsub_round_sd
// CHECK: @llvm.x86.avx512.mask.vfmadd.sd
- return _mm_mask_fmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
__m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_round_sd
// CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
- return _mm_maskz_fmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
__m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_round_sd
// CHECK: @llvm.x86.avx512.mask3.vfmsub.sd
- return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
return _mm_mask_fnmadd_sd(__W, __U, __A, __B);
}
+__m128d test_mm_fnmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
+ // CHECK-LABEL: @test_mm_fnmadd_round_sd
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+ return _mm_fnmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128d test_mm_mask_fnmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_round_sd
// CHECK: @llvm.x86.avx512.mask.vfmadd.sd
- return _mm_mask_fnmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fnmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
__m128d test_mm_maskz_fnmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_round_sd
// CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
- return _mm_maskz_fnmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fnmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
__m128d test_mm_mask3_fnmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_round_sd
// CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
- return _mm_mask3_fnmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fnmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
return _mm_mask_fnmsub_sd(__W, __U, __A, __B);
}
+__m128d test_mm_fnmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
+ // CHECK-LABEL: @test_mm_fnmsub_round_sd
+ // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+ return _mm_fnmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
__m128d test_mm_mask_fnmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_round_sd
// CHECK: @llvm.x86.avx512.mask.vfmadd.sd
- return _mm_mask_fnmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask_fnmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
__m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_round_sd
// CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
- return _mm_maskz_fnmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+ return _mm_maskz_fnmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
__m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd
// CHECK: @llvm.x86.avx512.mask3.vfmsub.sd
- return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+ return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_permutex_pd(__m512d __X) {