From cd1ee4494104517b80e3295a32fc1bfedbd6a1f6 Mon Sep 17 00:00:00 2001 From: Coby Tayree <coby.tayree@intel.com> Date: Wed, 27 Dec 2017 10:37:51 +0000 Subject: [PATCH] [x86][icelake][vnni] added vnni feature recognition added intrinsics support for VNNI instructions _mm256_mask_dpbusd_epi32 _mm256_maskz_dpbusd_epi32 _mm256_dpbusd_epi32 _mm256_mask_dpbusds_epi32 _mm256_maskz_dpbusds_epi32 _mm256_dpbusds_epi32 _mm256_mask_dpwssd_epi32 _mm256_maskz_dpwssd_epi32 _mm256_dpwssd_epi32 _mm256_mask_dpwssds_epi32 _mm256_maskz_dpwssds_epi32 _mm256_dpwssds_epi32 _mm128_mask_dpbusd_epi32 _mm128_maskz_dpbusd_epi32 _mm128_dpbusd_epi32 _mm128_mask_dpbusds_epi32 _mm128_maskz_dpbusds_epi32 _mm128_dpbusds_epi32 _mm128_mask_dpwssd_epi32 _mm128_maskz_dpwssd_epi32 _mm128_dpwssd_epi32 _mm128_mask_dpwssds_epi32 _mm128_maskz_dpwssds_epi32 _mm128_dpwssds_epi32 _mm512_mask_dpbusd_epi32 _mm512_maskz_dpbusd_epi32 _mm512_dpbusd_epi32 _mm512_mask_dpbusds_epi32 _mm512_maskz_dpbusds_epi32 _mm512_dpbusds_epi32 _mm512_mask_dpwssd_epi32 _mm512_maskz_dpwssd_epi32 _mm512_dpwssd_epi32 _mm512_mask_dpwssds_epi32 _mm512_maskz_dpwssds_epi32 _mm512_dpwssds_epi32 matching a similar work on the backend (D40208) Differential Revision: https://reviews.llvm.org/D41558 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@321484 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 25 ++ include/clang/Driver/Options.td | 2 + lib/Basic/Targets/X86.cpp | 12 +- lib/Basic/Targets/X86.h | 1 + lib/Headers/CMakeLists.txt | 2 + lib/Headers/avx512vlvnniintrin.h | 254 +++++++++++++++++++++ lib/Headers/avx512vnniintrin.h | 146 ++++++++++++ lib/Headers/immintrin.h | 9 + test/CodeGen/attr-target-x86.c | 4 +- test/CodeGen/avx512vlvnni-builtins.c | 148 ++++++++++++ test/CodeGen/avx512vnni-builtins.c | 76 ++++++ test/Driver/x86-target-features.c | 5 + test/Preprocessor/predefined-arch-macros.c | 2 + 13 files changed, 682 insertions(+), 4 deletions(-) create mode 100644 lib/Headers/avx512vlvnniintrin.h create mode 100644 lib/Headers/avx512vnniintrin.h create mode 100644 test/CodeGen/avx512vlvnni-builtins.c create mode 100644 test/CodeGen/avx512vnni-builtins.c diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 1039b3c0b3..dce835e0d3 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -979,6 +979,31 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") + TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi","","avx512vl") diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index d24eb736f0..aa09da3a47 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -2485,6 +2485,8 @@ def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>; def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>; def mavx512vl : Flag<["-"], "mavx512vl">, Group<m_x86_Features_Group>; def mno_avx512vl : Flag<["-"], "mno-avx512vl">, Group<m_x86_Features_Group>; +def mavx512vnni : Flag<["-"], "mavx512vnni">, Group<m_x86_Features_Group>; +def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group<m_x86_Features_Group>; def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>; def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>; def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>; diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp index e0ede257af..ac48bd610a 100644 --- a/lib/Basic/Targets/X86.cpp +++ b/lib/Basic/Targets/X86.cpp @@ -136,6 +136,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "gfni", true); setFeatureEnabledImpl(Features, "vpclmulqdq", true); setFeatureEnabledImpl(Features, "avx512bitalg", true); + setFeatureEnabledImpl(Features, "avx512vnni", true); // TODO: Add icelake features here. LLVM_FALLTHROUGH; case CK_Cannonlake: @@ -475,7 +476,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features, Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = Features["avx512vl"] = Features["avx512vbmi"] = Features["avx512ifma"] = Features["avx512vpopcntdq"] = - Features["avx512bitalg"] = false; + Features["avx512bitalg"] = Features["avx512vnni"] = false; break; } } @@ -606,7 +607,8 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features, } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" || Name == "avx512vbmi" || Name == "avx512ifma" || - Name == "avx512vpopcntdq" || Name == "avx512bitalg") { + Name == "avx512vpopcntdq" || Name == "avx512bitalg" || + Name == "avx512vnni") { if (Enabled) setSSELevel(Features, AVX512F, Enabled); // Enable BWI instruction if VBMI / BITALG is being enabled. @@ -698,6 +700,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { HasAVX512VPOPCNTDQ = true; + } else if (Feature == "+avx512vnni") { + HasAVX512VNNI = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; } else if (Feature == "+avx512pf") { @@ -1039,6 +1043,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) Builder.defineMacro("__AVX512VPOPCNTDQ__"); + if (HasAVX512VNNI) + Builder.defineMacro("__AVX512VNNI__"); if (HasAVX512ER) Builder.defineMacro("__AVX512ER__"); if (HasAVX512PF) @@ -1182,6 +1188,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512f", true) .Case("avx512cd", true) .Case("avx512vpopcntdq", true) + .Case("avx512vnni", true) .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512dq", true) @@ -1248,6 +1255,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) + .Case("avx512vnni", HasAVX512VNNI) .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h index eb742c9e09..b1f6f6e156 100644 --- a/lib/Basic/Targets/X86.h +++ b/lib/Basic/Targets/X86.h @@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasF16C = false; bool HasAVX512CD = false; bool HasAVX512VPOPCNTDQ = false; + bool HasAVX512VNNI = false; bool HasAVX512ER = false; bool HasAVX512PF = false; bool HasAVX512DQ = false; diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index d3b577e309..e643cc491b 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -24,6 +24,8 @@ set(files avx512vldqintrin.h avx512vlintrin.h avx512vpopcntdqvlintrin.h + avx512vnniintrin.h + avx512vlvnniintrin.h avxintrin.h bmi2intrin.h bmiintrin.h diff --git a/lib/Headers/avx512vlvnniintrin.h b/lib/Headers/avx512vlvnniintrin.h new file mode 100644 index 0000000000..745ae8b7ad --- /dev/null +++ b/lib/Headers/avx512vlvnniintrin.h @@ -0,0 +1,254 @@ +/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLVNNIINTRIN_H +#define __AVX512VLVNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vnniintrin.h b/lib/Headers/avx512vnniintrin.h new file mode 100644 index 0000000000..0c6badd231 --- /dev/null +++ b/lib/Headers/avx512vnniintrin.h @@ -0,0 +1,146 @@ +/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VNNIINTRIN_H +#define __AVX512VNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"))) + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index 714398b36d..a436208d47 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -167,6 +167,15 @@ _mm256_cvtph_ps(__m128i __a) #include <avx512vpopcntdqvlintrin.h> #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) +#include <avx512vnniintrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VNNI__)) +#include <avx512vlvnniintrin.h> +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) #include <avx512dqintrin.h> #endif diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c index 2facfe0a0e..5809c82ccb 100644 --- a/test/CodeGen/attr-target-x86.c +++ b/test/CodeGen/attr-target-x86.c @@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; } // CHECK: lake{{.*}} #7 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx" diff --git a/test/CodeGen/avx512vlvnni-builtins.c b/test/CodeGen/avx512vlvnni-builtins.c new file mode 100644 index 0000000000..861b915fdb --- /dev/null +++ b/test/CodeGen/avx512vlvnni-builtins.c @@ -0,0 +1,148 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include <immintrin.h> + +__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256 + return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.256 + return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256 + return _mm256_dpbusd_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256 + return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.256 + return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256 + return _mm256_dpbusds_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256 + return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.256 + return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256 + return _mm256_dpwssd_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256 + return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.256 + return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256 + return _mm256_dpwssds_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128 + return _mm128_mask_dpbusd_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.128 + return _mm128_maskz_dpbusd_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128 + return _mm128_dpbusd_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128 + return _mm128_mask_dpbusds_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.128 + return _mm128_maskz_dpbusds_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128 + return _mm128_dpbusds_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128 + return _mm128_mask_dpwssd_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.128 + return _mm128_maskz_dpwssd_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128 + return _mm128_dpwssd_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128 + return _mm128_mask_dpwssds_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.128 + return _mm128_maskz_dpwssds_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128 + return _mm128_dpwssds_epi32(__S, __A, __B); +} + diff --git a/test/CodeGen/avx512vnni-builtins.c b/test/CodeGen/avx512vnni-builtins.c new file mode 100644 index 0000000000..d79046aa04 --- /dev/null +++ b/test/CodeGen/avx512vnni-builtins.c @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include <immintrin.h> + +__m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512 + return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.512 + return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512 + return _mm512_dpbusd_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512 + return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.512 + return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512 + return _mm512_dpbusds_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512 + return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.512 + return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512 + return _mm512_dpwssd_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512 + return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.512 + return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512 + return _mm512_dpwssds_epi32(__S, __A, __B); +} + diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c index 57d3265bd4..e72f1dab2e 100644 --- a/test/Driver/x86-target-features.c +++ b/test/Driver/x86-target-features.c @@ -115,3 +115,8 @@ // BITALG: "-target-feature" "+avx512bitalg" // NO-BITALG: "-target-feature" "-avx512bitalg" +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VNNI %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VNNI %s +// VNNI: "-target-feature" "+avx512vnni" +// NO-VNNI: "-target-feature" "-avx512vnni" + diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c index dc552c76b5..1fafddaa21 100644 --- a/test/Preprocessor/predefined-arch-macros.c +++ b/test/Preprocessor/predefined-arch-macros.c @@ -1058,6 +1058,7 @@ // CHECK_ICL_M32: #define __AVX512IFMA__ 1 // CHECK_ICL_M32: #define __AVX512VBMI__ 1 // CHECK_ICL_M32: #define __AVX512VL__ 1 +// CHECK_ICL_M32: #define __AVX512VNNI__ 1 // CHECK_ICL_M32: #define __AVX__ 1 // CHECK_ICL_M32: #define __BMI2__ 1 // CHECK_ICL_M32: #define __BMI__ 1 @@ -1107,6 +1108,7 @@ // CHECK_ICL_M64: #define __AVX512IFMA__ 1 // CHECK_ICL_M64: #define __AVX512VBMI__ 1 // CHECK_ICL_M64: #define __AVX512VL__ 1 +// CHECK_ICL_M64: #define __AVX512VNNI__ 1 // CHECK_ICL_M64: #define __AVX__ 1 // CHECK_ICL_M64: #define __BMI2__ 1 // CHECK_ICL_M64: #define __BMI__ 1 -- 2.40.0