From 7c9c5922e0c09a794cbcdb9437915456399e6a63 Mon Sep 17 00:00:00 2001 From: Pengfei Wang Date: Fri, 31 May 2019 06:09:35 +0000 Subject: [PATCH] [X86] Add VP2INTERSECT instructions Support intel AVX512 VP2INTERSECT instructions in clang Patch by Xiang Zhang (xiangzhangllvm) Differential Revision: https://reviews.llvm.org/D62367 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@362196 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/ClangCommandLineReference.rst | 2 + include/clang/Basic/BuiltinsX86.def | 6 ++ include/clang/Driver/Options.td | 2 + lib/Basic/Targets/X86.cpp | 8 +- lib/Basic/Targets/X86.h | 1 + lib/CodeGen/CGBuiltin.cpp | 42 ++++++++ lib/Headers/CMakeLists.txt | 2 + lib/Headers/avx512vlvp2intersectintrin.h | 121 ++++++++++++++++++++++ lib/Headers/avx512vp2intersectintrin.h | 77 ++++++++++++++ lib/Headers/immintrin.h | 10 ++ test/CodeGen/attr-target-x86.c | 4 +- test/CodeGen/intel-avx512vlvp2intersect.c | 36 +++++++ test/CodeGen/intel-avx512vp2intersect.c | 20 ++++ test/Driver/x86-target-features.c | 5 + test/Preprocessor/x86_target_features.c | 10 ++ 15 files changed, 343 insertions(+), 3 deletions(-) create mode 100644 lib/Headers/avx512vlvp2intersectintrin.h create mode 100644 lib/Headers/avx512vp2intersectintrin.h create mode 100644 test/CodeGen/intel-avx512vlvp2intersect.c create mode 100644 test/CodeGen/intel-avx512vp2intersect.c diff --git a/docs/ClangCommandLineReference.rst b/docs/ClangCommandLineReference.rst index ddba3b10b7..ee58c2d30b 100644 --- a/docs/ClangCommandLineReference.rst +++ b/docs/ClangCommandLineReference.rst @@ -2639,6 +2639,8 @@ X86 .. option:: -mavx512vnni, -mno-avx512vnni +.. option:: -mavx512vp2intersect, -mno-avx512vp2intersect + .. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq .. option:: -mbmi, -mno-bmi diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 7db66c7b14..47f79b90fc 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1840,6 +1840,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512_mask, "V16sV16fV16sUs", "ncV:512: TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:", "avx512bf16") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_512, "vV8LLiV8LLiUc*Uc*", "nV:512:", "avx512vp2intersect") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_256, "vV4LLiV4LLiUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_128, "vV2LLiV2LLiUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_512, "vV16iV16iUs*Us*", "nV:512:", "avx512vp2intersect") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl") // generic select intrinsics TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl") diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index 68f415fb31..b86d39261e 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -2894,6 +2894,8 @@ def mavx512vnni : Flag<["-"], "mavx512vnni">, Group; def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group; def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group; def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group; +def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group; +def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group; def madx : Flag<["-"], "madx">, Group; def mno_adx : Flag<["-"], "mno-adx">, Group; def maes : Flag<["-"], "maes">, Group; diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp index 7bef7ce9c6..02e6ed2db6 100644 --- a/lib/Basic/Targets/X86.cpp +++ b/lib/Basic/Targets/X86.cpp @@ -524,6 +524,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features, Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; Features["avx512bitalg"] = Features["avx512vnni"] = false; Features["avx512vbmi2"] = Features["avx512bf16"] = false; + Features["avx512vp2intersect"] = false; break; } } @@ -774,6 +775,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasAVX512VBMI2 = true; } else if (Feature == "+avx512ifma") { HasAVX512IFMA = true; + } else if (Feature == "+avx512vp2intersect") { + HasAVX512VP2INTERSECT = true; } else if (Feature == "+sha") { HasSHA = true; } else if (Feature == "+mpx") { @@ -1166,7 +1169,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX512VBMI2__"); if (HasAVX512IFMA) Builder.defineMacro("__AVX512IFMA__"); - + if (HasAVX512VP2INTERSECT) + Builder.defineMacro("__AVX512VP2INTERSECT__"); if (HasSHA) Builder.defineMacro("__SHA__"); @@ -1322,6 +1326,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512vbmi", true) .Case("avx512vbmi2", true) .Case("avx512ifma", true) + .Case("avx512vp2intersect", true) .Case("bmi", true) .Case("bmi2", true) .Case("cldemote", true) @@ -1401,6 +1406,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512vbmi", HasAVX512VBMI) .Case("avx512vbmi2", HasAVX512VBMI2) .Case("avx512ifma", HasAVX512IFMA) + .Case("avx512vp2intersect", HasAVX512VP2INTERSECT) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) .Case("cldemote", HasCLDEMOTE) diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h index d7a87f8d7c..d84425d53d 100644 --- a/lib/Basic/Targets/X86.h +++ b/lib/Basic/Targets/X86.h @@ -78,6 +78,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAVX512VBMI = false; bool HasAVX512VBMI2 = false; bool HasAVX512IFMA = false; + bool HasAVX512VP2INTERSECT = false; bool HasSHA = false; bool HasMPX = false; bool HasSHSTK = false; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index bc798cab11..9fe2b315ed 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -11710,6 +11710,48 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); } + case X86::BI__builtin_ia32_vp2intersect_q_512: + case X86::BI__builtin_ia32_vp2intersect_q_256: + case X86::BI__builtin_ia32_vp2intersect_q_128: + case X86::BI__builtin_ia32_vp2intersect_d_512: + case X86::BI__builtin_ia32_vp2intersect_d_256: + case X86::BI__builtin_ia32_vp2intersect_d_128: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Intrinsic::ID ID; + + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vp2intersect_q_512: + ID = Intrinsic::x86_avx512_vp2intersect_q_512; + break; + case X86::BI__builtin_ia32_vp2intersect_q_256: + ID = Intrinsic::x86_avx512_vp2intersect_q_256; + break; + case X86::BI__builtin_ia32_vp2intersect_q_128: + ID = Intrinsic::x86_avx512_vp2intersect_q_128; + break; + case X86::BI__builtin_ia32_vp2intersect_d_512: + ID = Intrinsic::x86_avx512_vp2intersect_d_512; + break; + case X86::BI__builtin_ia32_vp2intersect_d_256: + ID = Intrinsic::x86_avx512_vp2intersect_d_256; + break; + case X86::BI__builtin_ia32_vp2intersect_d_128: + ID = Intrinsic::x86_avx512_vp2intersect_d_128; + break; + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); + Value *Result = Builder.CreateExtractValue(Call, 0); + Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); + Value *Store = Builder.CreateDefaultAlignedStore(Result, Ops[2]); + + Result = Builder.CreateExtractValue(Call, 1); + Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); + Store = Builder.CreateDefaultAlignedStore(Result, Ops[3]); + return Store; + } + case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: { diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index f7a3e5410c..dda76ed4e0 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -27,6 +27,8 @@ set(files avx512vlcdintrin.h avx512vldqintrin.h avx512vlintrin.h + avx512vp2intersectintrin.h + avx512vlvp2intersectintrin.h avx512vpopcntdqvlintrin.h avx512vnniintrin.h avx512vlvnniintrin.h diff --git a/lib/Headers/avx512vlvp2intersectintrin.h b/lib/Headers/avx512vlvp2intersectintrin.h new file mode 100644 index 0000000000..3e0815e5d4 --- /dev/null +++ b/lib/Headers/avx512vlvp2intersectintrin.h @@ -0,0 +1,121 @@ +/*===------ avx512vlvp2intersectintrin.h - VL VP2INTERSECT intrinsics ------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _AVX512VLVP2INTERSECT_H +#define _AVX512VLVP2INTERSECT_H + +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \ + __min_vector_width__(128))) + +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \ + __min_vector_width__(256))) +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between dwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTD instruction. +/// +/// \param __a +/// A 256-bit vector of [8 x i32]. +/// \param __b +/// A 256-bit vector of [8 x i32] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS256 +_mm256_2intersect_epi32(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_d_256((__v8si)__a, (__v8si)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between quadwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTQ instruction. +/// +/// \param __a +/// A 256-bit vector of [4 x i64]. +/// \param __b +/// A 256-bit vector of [4 x i64] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS256 +_mm256_2intersect_epi64(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_q_256((__v4di)__a, (__v4di)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between dwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTD instruction. +/// +/// \param __a +/// A 128-bit vector of [4 x i32]. +/// \param __b +/// A 128-bit vector of [4 x i32] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS128 +_mm_2intersect_epi32(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_d_128((__v4si)__a, (__v4si)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between quadwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTQ instruction. +/// +/// \param __a +/// A 128-bit vector of [2 x i64]. +/// \param __b +/// A 128-bit vector of [2 x i64] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS128 +_mm_2intersect_epi64(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_q_128((__v2di)__a, (__v2di)__b, __m0, __m1); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/lib/Headers/avx512vp2intersectintrin.h b/lib/Headers/avx512vp2intersectintrin.h new file mode 100644 index 0000000000..5d3cb48cfd --- /dev/null +++ b/lib/Headers/avx512vp2intersectintrin.h @@ -0,0 +1,77 @@ +/*===------- avx512vpintersectintrin.h - VP2INTERSECT intrinsics ------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _AVX512VP2INTERSECT_H +#define _AVX512VP2INTERSECT_H + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \ + __min_vector_width__(512))) + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between dwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTD instruction. +/// +/// \param __a +/// A 512-bit vector of [16 x i32]. +/// \param __b +/// A 512-bit vector of [16 x i32] +/// \param __m0 +/// A pointer point to 16-bit mask +/// \param __m1 +/// A pointer point to 16-bit mask +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_2intersect_epi32(__m512i __a, __m512i __b, __mmask16 *__m0, __mmask16 *__m1) { + __builtin_ia32_vp2intersect_d_512((__v16si)__a, (__v16si)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between quadwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTQ instruction. +/// +/// \param __a +/// A 512-bit vector of [8 x i64]. +/// \param __b +/// A 512-bit vector of [8 x i64] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_2intersect_epi64(__m512i __a, __m512i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_q_512((__v8di)__a, (__v8di)__b, __m0, __m1); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index ea009bd88b..73245f3858 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -421,6 +421,16 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || \ + defined(__AVX512VP2INTERSECT__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) +#include +#endif + #if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c index e3a2cb2e16..73486e5ee1 100644 --- a/test/CodeGen/attr-target-x86.c +++ b/test/CodeGen/attr-target-x86.c @@ -50,9 +50,9 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) { // CHECK: use_before_def{{.*}} #7 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" diff --git a/test/CodeGen/intel-avx512vlvp2intersect.c b/test/CodeGen/intel-avx512vlvp2intersect.c new file mode 100644 index 0000000000..c607a69969 --- /dev/null +++ b/test/CodeGen/intel-avx512vlvp2intersect.c @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { +// CHECK-LABEL: test_mm256_2intersect_epi32 +// CHECK: call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) +// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0 +// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1 + _mm256_2intersect_epi32(a, b, m0, m1); +} + +void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { +// CHECK-LABEL: test_mm256_2intersect_epi64 +// CHECK: call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) +// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 +// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + _mm256_2intersect_epi64(a, b, m0, m1); +} + +void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { +// CHECK-LABEL: test_mm_2intersect_epi32 +// CHECK: call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) +// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 +// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + _mm_2intersect_epi32(a, b, m0, m1); +} + +void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { +// CHECK-LABEL: test_mm_2intersect_epi64 +// CHECK: call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) +// CHECK: extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 +// CHECK: extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 + _mm_2intersect_epi64(a, b, m0, m1); +} diff --git a/test/CodeGen/intel-avx512vp2intersect.c b/test/CodeGen/intel-avx512vp2intersect.c new file mode 100644 index 0000000000..bcbf6076ee --- /dev/null +++ b/test/CodeGen/intel-avx512vp2intersect.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, __mmask16 *m1) { +// CHECK-LABEL: test_mm512_2intersect_epi32 +// CHECK: call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) +// CHECK: extractvalue { <16 x i1>, <16 x i1> } %{{.*}}, 0 +// CHECK: extractvalue { <16 x i1>, <16 x i1> } %{{.*}}, 1 + _mm512_2intersect_epi32(a, b, m0, m1); +} + +void test_mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *m0, __mmask8 *m1) { +// CHECK-LABEL: test_mm512_2intersect_epi64 +// CHECK: call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) +// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0 +// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1 + _mm512_2intersect_epi64(a, b, m0, m1); +} diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c index d925f6824c..5866d38648 100644 --- a/test/Driver/x86-target-features.c +++ b/test/Driver/x86-target-features.c @@ -125,6 +125,11 @@ // VBMI2: "-target-feature" "+avx512vbmi2" // NO-VBMI2: "-target-feature" "-avx512vbmi2" +// RUN: %clang -target i386-linux-gnu -mavx512vp2intersect %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VP2INTERSECT %s +// RUN: %clang -target i386-linux-gnu -mno-avx512vp2intersect %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VP2INTERSECT %s +// VP2INTERSECT: "-target-feature" "+avx512vp2intersect" +// NO-VP2INTERSECT: "-target-feature" "-avx512vp2intersect" + // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mrdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RDPID %s // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-rdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RDPID %s // RDPID: "-target-feature" "+rdpid" diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c index fa815dc61e..2dfca34121 100644 --- a/test/Preprocessor/x86_target_features.c +++ b/test/Preprocessor/x86_target_features.c @@ -458,3 +458,13 @@ // AVX512BF16_NOAVX512VL: #define __AVX512BF16__ 1 +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vp2intersect -x c -E -dM -o - %s | FileCheck -check-prefix=VP2INTERSECT %s + +// VP2INTERSECT: #define __AVX512F__ 1 +// VP2INTERSECT: #define __AVX512VP2INTERSECT__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vp2intersect -x c -E -dM -o - %s | FileCheck -check-prefix=NOVP2INTERSECT %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vp2intersect -mno-avx512f -x c -E -dM -o - %s | FileCheck -check-prefix=NOVP2INTERSECT %s + +// NOVP2INTERSECT-NOT: #define __AVX512VP2INTERSECT__ 1 + -- 2.40.0