]> granicus.if.org Git - clang/commitdiff
[X86] Adding avx512_vpopcntdq feature set and its intrinsics
authorOren Ben Simhon <oren.ben.simhon@intel.com>
Thu, 25 May 2017 13:44:11 +0000 (13:44 +0000)
committerOren Ben Simhon <oren.ben.simhon@intel.com>
Thu, 25 May 2017 13:44:11 +0000 (13:44 +0000)
AVX512_VPOPCNTDQ is a new feature set that was published by Intel.
The patch represents the Clang side of the addition of six intrinsics for two new machine instructions (vpopcntd and vpopcntq).
It also includes the addition of the new feature set.

Differential Revision: https://reviews.llvm.org/D33170

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@303857 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
include/clang/Driver/Options.td
lib/Basic/Targets.cpp
lib/CodeGen/CGBuiltin.cpp
lib/Headers/CMakeLists.txt
lib/Headers/avx512vpopcntdqintrin.h [new file with mode: 0644]
lib/Headers/immintrin.h
test/CodeGen/attr-target-x86.c
test/CodeGen/avx512vpopcntdqintrin.c [new file with mode: 0644]

index 68b868ce8e6eddab838f8d026d5f97d01840e5c0..a98c8f0a53db9c9e8e4a3d32067d5ec4de96d798 100644 (file)
@@ -1107,6 +1107,9 @@ TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "", "avx5
 TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd")
 TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd")
 
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
+
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
index 4f1ea08dfe9fa49a6cea9c55a1cfe393e4bd626e..2de4a2d1b419666dacdc19462d7fd4224eb397c1 100644 (file)
@@ -1741,6 +1741,7 @@ def mno_avx : Flag<["-"], "mno-avx">, Group<m_x86_Features_Group>;
 def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
 def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
 def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
+def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
 def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
 def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
 def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
@@ -1941,6 +1942,7 @@ def mavx : Flag<["-"], "mavx">, Group<m_x86_Features_Group>;
 def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
 def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
 def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
+def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
 def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
 def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
 def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
index 2d013ab5b13d627fd373f423ec6e7b7d157b7dc9..b35033876da1dc21fc4d32c116920304356a6354 100644 (file)
@@ -2624,6 +2624,7 @@ class X86TargetInfo : public TargetInfo {
   bool HasFMA = false;
   bool HasF16C = false;
   bool HasAVX512CD = false;
+  bool HasAVX512VPOPCNTDQ = false;
   bool HasAVX512ER = false;
   bool HasAVX512PF = false;
   bool HasAVX512DQ = false;
@@ -3504,9 +3505,9 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
     LLVM_FALLTHROUGH;
   case AVX512F:
     Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
-      Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
-      Features["avx512vl"] = Features["avx512vbmi"] =
-      Features["avx512ifma"] = false;
+        Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
+            Features["avx512vl"] = Features["avx512vbmi"] =
+                Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
   }
 }
 
@@ -3616,7 +3617,8 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
     setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
              Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
-             Name == "avx512vbmi" || Name == "avx512ifma") {
+             Name == "avx512vbmi" || Name == "avx512ifma" ||
+             Name == "avx512vpopcntdq") {
     if (Enabled)
       setSSELevel(Features, AVX512F, Enabled);
     // Enable BWI instruction if VBMI is being enabled.
@@ -3700,6 +3702,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasF16C = true;
     } else if (Feature == "+avx512cd") {
       HasAVX512CD = true;
+    } else if (Feature == "+avx512vpopcntdq") {
+      HasAVX512VPOPCNTDQ = true;
     } else if (Feature == "+avx512er") {
       HasAVX512ER = true;
     } else if (Feature == "+avx512pf") {
@@ -4037,6 +4041,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
 
   if (HasAVX512CD)
     Builder.defineMacro("__AVX512CD__");
+  if (HasAVX512VPOPCNTDQ)
+    Builder.defineMacro("__AVX512VPOPCNTDQ__");
   if (HasAVX512ER)
     Builder.defineMacro("__AVX512ER__");
   if (HasAVX512PF)
@@ -4168,6 +4174,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx2", SSELevel >= AVX2)
       .Case("avx512f", SSELevel >= AVX512F)
       .Case("avx512cd", HasAVX512CD)
+      .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
       .Case("avx512er", HasAVX512ER)
       .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
@@ -4253,6 +4260,7 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
       .Case("avx512bw", true)
       .Case("avx512dq", true)
       .Case("avx512cd", true)
+      .Case("avx512vpopcntdq", true)
       .Case("avx512er", true)
       .Case("avx512pf", true)
       .Case("avx512vbmi", true)
index f3a04a6a4e1a845956644b8d699e44db8bc1378c..2134fb9e03e4dbeb3e6824bf56f76f42c8654567 100644 (file)
@@ -7332,39 +7332,42 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       AVX512PF,
       AVX512VBMI,
       AVX512IFMA,
+      AVX512VPOPCNTDQ,
       MAX
     };
 
-    X86Features Feature = StringSwitch<X86Features>(FeatureStr)
-                              .Case("cmov", X86Features::CMOV)
-                              .Case("mmx", X86Features::MMX)
-                              .Case("popcnt", X86Features::POPCNT)
-                              .Case("sse", X86Features::SSE)
-                              .Case("sse2", X86Features::SSE2)
-                              .Case("sse3", X86Features::SSE3)
-                              .Case("ssse3", X86Features::SSSE3)
-                              .Case("sse4.1", X86Features::SSE4_1)
-                              .Case("sse4.2", X86Features::SSE4_2)
-                              .Case("avx", X86Features::AVX)
-                              .Case("avx2", X86Features::AVX2)
-                              .Case("sse4a", X86Features::SSE4_A)
-                              .Case("fma4", X86Features::FMA4)
-                              .Case("xop", X86Features::XOP)
-                              .Case("fma", X86Features::FMA)
-                              .Case("avx512f", X86Features::AVX512F)
-                              .Case("bmi", X86Features::BMI)
-                              .Case("bmi2", X86Features::BMI2)
-                              .Case("aes", X86Features::AES)
-                              .Case("pclmul", X86Features::PCLMUL)
-                              .Case("avx512vl", X86Features::AVX512VL)
-                              .Case("avx512bw", X86Features::AVX512BW)
-                              .Case("avx512dq", X86Features::AVX512DQ)
-                              .Case("avx512cd", X86Features::AVX512CD)
-                              .Case("avx512er", X86Features::AVX512ER)
-                              .Case("avx512pf", X86Features::AVX512PF)
-                              .Case("avx512vbmi", X86Features::AVX512VBMI)
-                              .Case("avx512ifma", X86Features::AVX512IFMA)
-                              .Default(X86Features::MAX);
+    X86Features Feature =
+        StringSwitch<X86Features>(FeatureStr)
+            .Case("cmov", X86Features::CMOV)
+            .Case("mmx", X86Features::MMX)
+            .Case("popcnt", X86Features::POPCNT)
+            .Case("sse", X86Features::SSE)
+            .Case("sse2", X86Features::SSE2)
+            .Case("sse3", X86Features::SSE3)
+            .Case("ssse3", X86Features::SSSE3)
+            .Case("sse4.1", X86Features::SSE4_1)
+            .Case("sse4.2", X86Features::SSE4_2)
+            .Case("avx", X86Features::AVX)
+            .Case("avx2", X86Features::AVX2)
+            .Case("sse4a", X86Features::SSE4_A)
+            .Case("fma4", X86Features::FMA4)
+            .Case("xop", X86Features::XOP)
+            .Case("fma", X86Features::FMA)
+            .Case("avx512f", X86Features::AVX512F)
+            .Case("bmi", X86Features::BMI)
+            .Case("bmi2", X86Features::BMI2)
+            .Case("aes", X86Features::AES)
+            .Case("pclmul", X86Features::PCLMUL)
+            .Case("avx512vl", X86Features::AVX512VL)
+            .Case("avx512bw", X86Features::AVX512BW)
+            .Case("avx512dq", X86Features::AVX512DQ)
+            .Case("avx512cd", X86Features::AVX512CD)
+            .Case("avx512er", X86Features::AVX512ER)
+            .Case("avx512pf", X86Features::AVX512PF)
+            .Case("avx512vbmi", X86Features::AVX512VBMI)
+            .Case("avx512ifma", X86Features::AVX512IFMA)
+            .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
+            .Default(X86Features::MAX);
     assert(Feature != X86Features::MAX && "Invalid feature!");
 
     // Matching the struct layout from the compiler-rt/libgcc structure that is
@@ -7517,7 +7520,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_storesd128_mask: {
     return EmitX86MaskedStore(*this, Ops, 16);
   }
-
+  case X86::BI__builtin_ia32_vpopcntd_512:
+  case X86::BI__builtin_ia32_vpopcntq_512: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
+    return Builder.CreateCall(F, Ops);
+  }
   case X86::BI__builtin_ia32_cvtmask2b128:
   case X86::BI__builtin_ia32_cvtmask2b256:
   case X86::BI__builtin_ia32_cvtmask2b512:
index 6091db08a93bec69c8ada5cf98fa5b2761aa262b..a621c02644e3800defe27be01e24d5fa7a07213c 100644 (file)
@@ -7,6 +7,7 @@ set(files
   avx2intrin.h
   avx512bwintrin.h
   avx512cdintrin.h
+  avx512vpopcntdqintrin.h
   avx512dqintrin.h
   avx512erintrin.h
   avx512fintrin.h
diff --git a/lib/Headers/avx512vpopcntdqintrin.h b/lib/Headers/avx512vpopcntdqintrin.h
new file mode 100644 (file)
index 0000000..34ab849
--- /dev/null
@@ -0,0 +1,70 @@
+/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
+ *------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx512vpopcntdqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VPOPCNTDQINTRIN_H
+#define __AVX512VPOPCNTDQINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd"   \
+                                                            "q")))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_popcnt_epi64(__A), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
+  return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_popcnt_epi32(__A), (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
+  return _mm512_mask_popcnt_epi32((__m512i)_mm512_setzero_si512(), __U, __A);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
index 7f91d49fbcec0126cf9dd3a7862a62be33fd7977..e22dd231427dfc2032981fa464bed2470865edcb 100644 (file)
@@ -146,6 +146,10 @@ _mm256_cvtph_ps(__m128i __a)
 #include <avx512cdintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
+#include <avx512vpopcntdqintrin.h>
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
 #include <avx512dqintrin.h>
 #endif
index 7557ec7acdd507609eafd4f94b52aa32fb0574e3..f2777679aec1339adc82ff967e3a8cd2070699e6 100644 (file)
@@ -36,7 +36,7 @@ int __attribute__((target("arch=lakemont"))) lake(int a) { return 4; }
 // CHECK: lake{{.*}} #6
 // CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
 // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
 // CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
 // CHECK: #4 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes"
 // CHECK: #5 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+sse,+sse2,+x87,-3dnow,-3dnowa,-mmx"
diff --git a/test/CodeGen/avx512vpopcntdqintrin.c b/test/CodeGen/avx512vpopcntdqintrin.c
new file mode 100644 (file)
index 0000000..e7c797c
--- /dev/null
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_popcnt_epi64(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_popcnt_epi64
+  // CHECK: @llvm.ctpop.v8i64
+  return _mm512_popcnt_epi64(__A);
+}
+__m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_popcnt_epi64
+  // CHECK: @llvm.ctpop.v8i64
+  // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}}
+  return _mm512_mask_popcnt_epi64(__W, __U, __A);
+}
+__m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_popcnt_epi64
+  // CHECK: @llvm.ctpop.v8i64
+  // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}}
+  return _mm512_maskz_popcnt_epi64(__U, __A);
+}
+__m512i test_mm512_popcnt_epi32(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_popcnt_epi32
+  // CHECK: @llvm.ctpop.v16i32
+  return _mm512_popcnt_epi32(__A);
+}
+__m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_popcnt_epi32
+  // CHECK: @llvm.ctpop.v16i32
+  // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}}
+  return _mm512_mask_popcnt_epi32(__W, __U, __A);
+}
+__m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_popcnt_epi32
+  // CHECK: @llvm.ctpop.v16i32
+  // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}}
+  return _mm512_maskz_popcnt_epi32(__U, __A);
+}