TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd")
TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
+
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
+def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
+def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
bool HasFMA = false;
bool HasF16C = false;
bool HasAVX512CD = false;
+ bool HasAVX512VPOPCNTDQ = false;
bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;
LLVM_FALLTHROUGH;
case AVX512F:
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
- Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
- Features["avx512vl"] = Features["avx512vbmi"] =
- Features["avx512ifma"] = false;
+ Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
+ Features["avx512vl"] = Features["avx512vbmi"] =
+ Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
}
}
setSSELevel(Features, AVX512F, Enabled);
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
- Name == "avx512vbmi" || Name == "avx512ifma") {
+ Name == "avx512vbmi" || Name == "avx512ifma" ||
+ Name == "avx512vpopcntdq") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
// Enable BWI instruction if VBMI is being enabled.
HasF16C = true;
} else if (Feature == "+avx512cd") {
HasAVX512CD = true;
+ } else if (Feature == "+avx512vpopcntdq") {
+ HasAVX512VPOPCNTDQ = true;
} else if (Feature == "+avx512er") {
HasAVX512ER = true;
} else if (Feature == "+avx512pf") {
if (HasAVX512CD)
Builder.defineMacro("__AVX512CD__");
+ if (HasAVX512VPOPCNTDQ)
+ Builder.defineMacro("__AVX512VPOPCNTDQ__");
if (HasAVX512ER)
Builder.defineMacro("__AVX512ER__");
if (HasAVX512PF)
.Case("avx2", SSELevel >= AVX2)
.Case("avx512f", SSELevel >= AVX512F)
.Case("avx512cd", HasAVX512CD)
+ .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
.Case("avx512bw", true)
.Case("avx512dq", true)
.Case("avx512cd", true)
+ .Case("avx512vpopcntdq", true)
.Case("avx512er", true)
.Case("avx512pf", true)
.Case("avx512vbmi", true)
AVX512PF,
AVX512VBMI,
AVX512IFMA,
+ AVX512VPOPCNTDQ,
MAX
};
- X86Features Feature = StringSwitch<X86Features>(FeatureStr)
- .Case("cmov", X86Features::CMOV)
- .Case("mmx", X86Features::MMX)
- .Case("popcnt", X86Features::POPCNT)
- .Case("sse", X86Features::SSE)
- .Case("sse2", X86Features::SSE2)
- .Case("sse3", X86Features::SSE3)
- .Case("ssse3", X86Features::SSSE3)
- .Case("sse4.1", X86Features::SSE4_1)
- .Case("sse4.2", X86Features::SSE4_2)
- .Case("avx", X86Features::AVX)
- .Case("avx2", X86Features::AVX2)
- .Case("sse4a", X86Features::SSE4_A)
- .Case("fma4", X86Features::FMA4)
- .Case("xop", X86Features::XOP)
- .Case("fma", X86Features::FMA)
- .Case("avx512f", X86Features::AVX512F)
- .Case("bmi", X86Features::BMI)
- .Case("bmi2", X86Features::BMI2)
- .Case("aes", X86Features::AES)
- .Case("pclmul", X86Features::PCLMUL)
- .Case("avx512vl", X86Features::AVX512VL)
- .Case("avx512bw", X86Features::AVX512BW)
- .Case("avx512dq", X86Features::AVX512DQ)
- .Case("avx512cd", X86Features::AVX512CD)
- .Case("avx512er", X86Features::AVX512ER)
- .Case("avx512pf", X86Features::AVX512PF)
- .Case("avx512vbmi", X86Features::AVX512VBMI)
- .Case("avx512ifma", X86Features::AVX512IFMA)
- .Default(X86Features::MAX);
+ X86Features Feature =
+ StringSwitch<X86Features>(FeatureStr)
+ .Case("cmov", X86Features::CMOV)
+ .Case("mmx", X86Features::MMX)
+ .Case("popcnt", X86Features::POPCNT)
+ .Case("sse", X86Features::SSE)
+ .Case("sse2", X86Features::SSE2)
+ .Case("sse3", X86Features::SSE3)
+ .Case("ssse3", X86Features::SSSE3)
+ .Case("sse4.1", X86Features::SSE4_1)
+ .Case("sse4.2", X86Features::SSE4_2)
+ .Case("avx", X86Features::AVX)
+ .Case("avx2", X86Features::AVX2)
+ .Case("sse4a", X86Features::SSE4_A)
+ .Case("fma4", X86Features::FMA4)
+ .Case("xop", X86Features::XOP)
+ .Case("fma", X86Features::FMA)
+ .Case("avx512f", X86Features::AVX512F)
+ .Case("bmi", X86Features::BMI)
+ .Case("bmi2", X86Features::BMI2)
+ .Case("aes", X86Features::AES)
+ .Case("pclmul", X86Features::PCLMUL)
+ .Case("avx512vl", X86Features::AVX512VL)
+ .Case("avx512bw", X86Features::AVX512BW)
+ .Case("avx512dq", X86Features::AVX512DQ)
+ .Case("avx512cd", X86Features::AVX512CD)
+ .Case("avx512er", X86Features::AVX512ER)
+ .Case("avx512pf", X86Features::AVX512PF)
+ .Case("avx512vbmi", X86Features::AVX512VBMI)
+ .Case("avx512ifma", X86Features::AVX512IFMA)
+ .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
+ .Default(X86Features::MAX);
assert(Feature != X86Features::MAX && "Invalid feature!");
// Matching the struct layout from the compiler-rt/libgcc structure that is
case X86::BI__builtin_ia32_storesd128_mask: {
return EmitX86MaskedStore(*this, Ops, 16);
}
-
+ case X86::BI__builtin_ia32_vpopcntd_512:
+ case X86::BI__builtin_ia32_vpopcntq_512: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
+ return Builder.CreateCall(F, Ops);
+ }
case X86::BI__builtin_ia32_cvtmask2b128:
case X86::BI__builtin_ia32_cvtmask2b256:
case X86::BI__builtin_ia32_cvtmask2b512:
avx2intrin.h
avx512bwintrin.h
avx512cdintrin.h
+ avx512vpopcntdqintrin.h
avx512dqintrin.h
avx512erintrin.h
avx512fintrin.h
--- /dev/null
+/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
+ *------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx512vpopcntdqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VPOPCNTDQINTRIN_H
+#define __AVX512VPOPCNTDQINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \
+ "q")))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
+ return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ (__mmask8)__U, (__v8di)_mm512_popcnt_epi64(__A), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
+ return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
+ return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_popcnt_epi32(__A), (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
+ return _mm512_mask_popcnt_epi32((__m512i)_mm512_setzero_si512(), __U, __A);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
#include <avx512cdintrin.h>
#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
+#include <avx512vpopcntdqintrin.h>
+#endif
+
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
#include <avx512dqintrin.h>
#endif
// CHECK: lake{{.*}} #6
// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
// CHECK: #4 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes"
// CHECK: #5 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+sse,+sse2,+x87,-3dnow,-3dnowa,-mmx"
--- /dev/null
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_popcnt_epi64(__m512i __A) {
+ // CHECK-LABEL: @test_mm512_popcnt_epi64
+ // CHECK: @llvm.ctpop.v8i64
+ return _mm512_popcnt_epi64(__A);
+}
+__m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+ // CHECK-LABEL: @test_mm512_mask_popcnt_epi64
+ // CHECK: @llvm.ctpop.v8i64
+ // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}}
+ return _mm512_mask_popcnt_epi64(__W, __U, __A);
+}
+__m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
+ // CHECK-LABEL: @test_mm512_maskz_popcnt_epi64
+ // CHECK: @llvm.ctpop.v8i64
+ // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}}
+ return _mm512_maskz_popcnt_epi64(__U, __A);
+}
+__m512i test_mm512_popcnt_epi32(__m512i __A) {
+ // CHECK-LABEL: @test_mm512_popcnt_epi32
+ // CHECK: @llvm.ctpop.v16i32
+ return _mm512_popcnt_epi32(__A);
+}
+__m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+ // CHECK-LABEL: @test_mm512_mask_popcnt_epi32
+ // CHECK: @llvm.ctpop.v16i32
+ // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}}
+ return _mm512_mask_popcnt_epi32(__W, __U, __A);
+}
+__m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
+ // CHECK-LABEL: @test_mm512_maskz_popcnt_epi32
+ // CHECK: @llvm.ctpop.v16i32
+ // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}}
+ return _mm512_maskz_popcnt_epi32(__U, __A);
+}