]> granicus.if.org Git - clang/commitdiff
Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake
authorLuo, Yuanke <yuanke.luo@intel.com>
Mon, 6 May 2019 08:25:11 +0000 (08:25 +0000)
committerLuo, Yuanke <yuanke.luo@intel.com>
Mon, 6 May 2019 08:25:11 +0000 (08:25 +0000)
Summary:
1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake;
2. Enable intrinsics for VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
For more details about BF16 intrinsic, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference

Patch by LiuTianle

Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, spatel, RKSimon

Reviewed By: craig.topper

Subscribers: mgorny, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D60552

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@360018 91177308-0d34-0410-b5e6-96231b3b80d8

12 files changed:
docs/ClangCommandLineReference.rst
include/clang/Basic/BuiltinsX86.def
include/clang/Driver/Options.td
lib/Basic/Targets/X86.cpp
lib/Basic/Targets/X86.h
lib/CodeGen/CGBuiltin.cpp
lib/Headers/CMakeLists.txt
lib/Headers/cpuid.h
lib/Headers/immintrin.h
test/CodeGen/attr-target-x86.c
test/Driver/x86-target-features.c
test/Preprocessor/x86_target_features.c

index e852c3e3879855cf293180b1880c27c54bad1d9d..f153a0cd0724ac353902a25c09315aeb83e266ae 100644 (file)
@@ -2610,6 +2610,8 @@ X86
 
 .. option:: -mavx512bitalg, -mno-avx512bitalg
 
+.. option:: -mavx512bf16, -mno-avx512bf16
+
 .. option:: -mavx512bw, -mno-avx512bw
 
 .. option:: -mavx512cd, -mno-avx512cd
index 5cbab8a3b327202fe4aa477e91051612a7443da0..6eef6954f063e4029b503a0342380035df405f34 100644 (file)
@@ -1831,6 +1831,24 @@ TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:",
+                                                 "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:",
+                                                 "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:",
+                                                 "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8sV4fV8sUc", "ncV:128:",
+                                                "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256, "V8sV8f", "ncV:256:",
+                                                "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512, "V16sV16f", "ncV:512:",
+                                                "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:",
+                                            "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:",
+                                            "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:",
+                                            "avx512bf16")
 
 // generic select intrinsics
 TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
index 6d4aa1bf42c9ef262ff75e321cfab3be1e95f35f..a293a39f28cc2f3df2def2469a645b8b2b332975 100644 (file)
@@ -2854,6 +2854,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
 def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
 def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
 def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
+def mavx512bf16 : Flag<["-"], "mavx512bf16">, Group<m_x86_Features_Group>;
+def mno_avx512bf16 : Flag<["-"], "mno-avx512bf16">, Group<m_x86_Features_Group>;
 def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
 def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
 def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
index 2345d7ffcce0c76065bc6e0f4808e61e6921b71c..b83c9382fc4a0b5aaddfdc461f75f872408b7745 100644 (file)
@@ -521,6 +521,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
                 Features["avx512ifma"] = Features["avx512vpopcntdq"] =
                     Features["avx512bitalg"] = Features["avx512vnni"] =
                         Features["avx512vbmi2"] = false;
+                        Features["avx512bf16"] = false;
     break;
   }
 }
@@ -652,16 +653,22 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
              Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
              Name == "avx512vbmi" || Name == "avx512ifma" ||
              Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
+             Name == "avx512bf16" ||
              Name == "avx512vnni" || Name == "avx512vbmi2") {
     if (Enabled)
       setSSELevel(Features, AVX512F, Enabled);
     // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled.
     if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
       Features["avx512bw"] = true;
+    if (Name == "avx512bf16" && Enabled)
+      Features["avx512bw"] = Features["avx512vl"] = true;
     // Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
     if (Name == "avx512bw" && !Enabled)
       Features["avx512vbmi"] = Features["avx512vbmi2"] =
+      Features["avx512bf16"] =
       Features["avx512bitalg"] = false;
+    if (Name == "avx512vl" && !Enabled)
+      Features["avx512bf16"] = false;
   } else if (Name == "fma") {
     if (Enabled)
       setSSELevel(Features, AVX, Enabled);
@@ -751,6 +758,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512VPOPCNTDQ = true;
     } else if (Feature == "+avx512vnni") {
       HasAVX512VNNI = true;
+    } else if (Feature == "+avx512bf16") {
+      HasAVX512BF16 = true;
     } else if (Feature == "+avx512er") {
       HasAVX512ER = true;
     } else if (Feature == "+avx512pf") {
@@ -1141,6 +1150,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AVX512VPOPCNTDQ__");
   if (HasAVX512VNNI)
     Builder.defineMacro("__AVX512VNNI__");
+  if (HasAVX512BF16)
+    Builder.defineMacro("__AVX512BF16__");
   if (HasAVX512ER)
     Builder.defineMacro("__AVX512ER__");
   if (HasAVX512PF)
@@ -1305,6 +1316,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("avx512cd", true)
       .Case("avx512vpopcntdq", true)
       .Case("avx512vnni", true)
+      .Case("avx512bf16", true)
       .Case("avx512er", true)
       .Case("avx512pf", true)
       .Case("avx512dq", true)
@@ -1383,6 +1395,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512cd", HasAVX512CD)
       .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
       .Case("avx512vnni", HasAVX512VNNI)
+      .Case("avx512bf16", HasAVX512BF16)
       .Case("avx512er", HasAVX512ER)
       .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
index 9f285d8d50d00d10a564719d637aaeb150ca0167..d7a87f8d7c829cbf47ff58e20a75b9dbd6376088 100644 (file)
@@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAVX512CD = false;
   bool HasAVX512VPOPCNTDQ = false;
   bool HasAVX512VNNI = false;
+  bool HasAVX512BF16 = false;
   bool HasAVX512ER = false;
   bool HasAVX512PF = false;
   bool HasAVX512DQ = false;
index 5abb62c560b270c238c05109fef16b56cd9990ca..14b8d0b18d3f09fb25c9f561289bcec8b231abff 100644 (file)
@@ -11851,6 +11851,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_cmpordsd:
     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
 
+// AVX512 bf16 intrinsics
+  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+    Ops[2] = getMaskVecValue(*this, Ops[2],
+                             Ops[0]->getType()->getVectorNumElements());
+    Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
+    return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+  }
+
   case X86::BI__emul:
   case X86::BI__emulu: {
     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
index a921d99399f317cd1b7a29de348c49bb0b0604ec..bfcade4584e6e156d296860547b4ea576e7d788c 100644 (file)
@@ -6,6 +6,7 @@ set(files
   armintr.h
   arm64intr.h
   avx2intrin.h
+  avx512bf16intrin.h
   avx512bwintrin.h
   avx512bitalgintrin.h
   avx512vlbitalgintrin.h
@@ -21,6 +22,7 @@ set(files
   avx512vbmivlintrin.h
   avx512vbmi2intrin.h
   avx512vlvbmi2intrin.h
+  avx512vlbf16intrin.h
   avx512vlbwintrin.h
   avx512vlcdintrin.h
   avx512vldqintrin.h
index 137b9bc7fb3ce1419d67989fd659e65338549c51..ffe638adf3da4d8952d556f1e76d351c84a17c52 100644 (file)
 #define bit_PCONFIG       0x00040000
 #define bit_IBT           0x00100000
 
+/* Features in %eax for leaf 7 sub-leaf 1 */
+#define bit_AVX512BF16    0x00000020
+
 /* Features in %eax for leaf 13 sub-leaf 1 */
 #define bit_XSAVEOPT    0x00000001
 #define bit_XSAVEC      0x00000002
index 1fda86ed26df57d4850dfe97936249c8cd9053ad..2bd79bd88f3826dfc3e7ba2b197ec4d3fdc393bc 100644 (file)
 #include <avx512pfintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
+#include <avx512bf16intrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VL__) && defined(__AVX512BF16__))
+#include <avx512vlbf16intrin.h>
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
 #include <pkuintrin.h>
 #endif
index 56ccaf98ea4ac57cd9e74fa07aa264b3b1d70d06..e3a2cb2e1603bd430cce503e2e3236289ea82a81 100644 (file)
@@ -50,9 +50,9 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
 // CHECK: use_before_def{{.*}} #7
 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87"
 // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
 // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
 // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
 // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
 // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
index ee2e6afd615d82de178aa6aa2b677eba20a90dde..d925f6824c9e213618d129978220be095233f62d 100644 (file)
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-invpcid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-INVPCID %s
 // INVPCID: "-target-feature" "+invpcid"
 // NO-INVPCID: "-target-feature" "-invpcid"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX512BF16 %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512BF16 %s
+// AVX512BF16: "-target-feature" "+avx512bf16"
+// NO-AVX512BF16: "-target-feature" "-avx512bf16"
index 3e4ffe44dd30616e64e6491cf5315c9e8a8ebce1..54f56a826b3c07dfb885117ed97e224f3e1ca365 100644 (file)
 // RUN: %clang -target i386-unknown-unknown -march=atom -mrdpid -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=RDPID %s
 
 // RDPID: #define __RDPID__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16 %s
+
+// AVX512BF16: #define __AVX512BF16__ 1
+// AVX512BF16: #define __AVX512BW__ 1
+// AVX512BF16: #define __AVX512VL__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512BW %s
+
+// AVX512BF16_NOAVX512BW-NOT: #define __AVX512BF16__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512VL %s
+
+// AVX512BF16_NOAVX512VL-NOT: #define __AVX512BF16__ 1
+