TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:",
- "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:",
- "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:",
- "avx512bf16")
-TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8sV4fV8sUc", "ncV:128:",
- "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256, "V8sV8f", "ncV:256:",
- "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512, "V16sV16f", "ncV:512:",
- "avx512bf16")
-TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:",
- "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:",
- "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:",
- "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:", "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:", "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:", "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8sV4fV8sUc", "ncV:128:", "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256_mask, "V8sV8fV8sUc", "ncV:256:", "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512_mask, "V16sV16fV16sUs", "ncV:512:", "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:", "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:", "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:", "avx512bf16")
// generic select intrinsics
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
Features["avx512bw"] = true;
if (Name == "avx512bf16" && Enabled)
- Features["avx512bw"] = Features["avx512vl"] = true;
+ Features["avx512bw"] = true;
// Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
if (Name == "avx512bw" && !Enabled)
Features["avx512vbmi"] = Features["avx512vbmi2"] =
Features["avx512bf16"] =
Features["avx512bitalg"] = false;
- if (Name == "avx512vl" && !Enabled)
- Features["avx512bf16"] = false;
} else if (Name == "fma") {
if (Enabled)
setSSELevel(Features, AVX, Enabled);
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
+ IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
+ break;
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+ IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
+ break;
+ }
+ Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
+ return EmitX86Select(*this, Ops[2], Res, Ops[1]);
+ }
+
case X86::BI__emul:
case X86::BI__emulu: {
llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_cvtneps_pbh(__m512 __A) {
- return (__m256bh)__builtin_ia32_cvtneps2bf16_512((__v16sf) __A);
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
+ (__v16hi)_mm256_undefined_si256(),
+ (__mmask16)-1);
}
/// Convert Packed Single Data to Packed BF16 Data.
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_mask_cvtneps_pbh(__m256bh __W, __mmask16 __U, __m512 __A) {
- return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm512_cvtneps_pbh(__A),
- (__v16hi)__W);
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
+ (__v16hi)__W,
+ (__mmask16)__U);
}
/// Convert Packed Single Data to Packed BF16 Data.
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtneps_pbh(__mmask16 __U, __m512 __A) {
- return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm512_cvtneps_pbh(__A),
- (__v16hi)_mm256_setzero_si256());
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
+ (__v16hi)_mm256_setzero_si256(),
+ (__mmask16)__U);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_cvtneps_pbh(__m256 __A) {
- return (__m128bh)__builtin_ia32_cvtneps2bf16_256((__v8sf)__A);
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
+ (__v8hi)_mm_undefined_si128(),
+ (__mmask8)-1);
}
/// Convert Packed Single Data to Packed BF16 Data.
/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m256 __A) {
- return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U,
- (__v8hi)_mm256_cvtneps_pbh(__A),
- (__v8hi)__W);
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
+ (__v8hi)__W,
+ (__mmask8)__U);
}
/// Convert Packed Single Data to Packed BF16 Data.
/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtneps_pbh(__mmask8 __U, __m256 __A) {
- return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U,
- (__v8hi)_mm256_cvtneps_pbh(__A),
- (__v8hi)_mm_setzero_si128());
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
+ (__v8hi)_mm_setzero_si128(),
+ (__mmask8)__U);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
// AVX512BF16: #define __AVX512BF16__ 1
// AVX512BF16: #define __AVX512BW__ 1
-// AVX512BF16: #define __AVX512VL__ 1
+// AVX512BF16-NOT: #define __AVX512VL__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512BW %s
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512VL %s
-// AVX512BF16_NOAVX512VL-NOT: #define __AVX512BF16__ 1
+// AVX512BF16_NOAVX512VL: #define __AVX512BF16__ 1