From a7c6c642e3ce7d594bd6e571bf59cbc7507e9d5b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 8 May 2017 12:09:45 +0000 Subject: [PATCH] [X86][LWP] Add clang support for LWP instructions. This patch adds support for the the LightWeight Profiling (LWP) instructions which are available on all AMD Bulldozer class CPUs (bdver1 to bdver4). Differential Revision: https://reviews.llvm.org/D32770 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@302418 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 6 + include/clang/Basic/BuiltinsX86_64.def | 2 + include/clang/Driver/Options.td | 2 + lib/Basic/Targets.cpp | 8 ++ lib/Headers/CMakeLists.txt | 1 + lib/Headers/lwpintrin.h | 150 +++++++++++++++++++++++++ lib/Headers/x86intrin.h | 4 + test/CodeGen/lwp-builtins.c | 39 +++++++ 8 files changed, 212 insertions(+) create mode 100644 lib/Headers/lwpintrin.h create mode 100644 test/CodeGen/lwp-builtins.c diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index c8a3c2f4d3..68b868ce8e 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -668,6 +668,12 @@ TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "", "bmi2") // TBM TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "", "tbm") +// LWP +TARGET_BUILTIN(__builtin_ia32_llwpcb, "vv*", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_slwpcb, "v*", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpins32, "UcUiUiUi", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "", "lwp") + // SHA TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "", "sha") TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "", "sha") diff --git a/include/clang/Basic/BuiltinsX86_64.def b/include/clang/Basic/BuiltinsX86_64.def index d38f522c38..2851184c2c 100644 --- a/include/clang/Basic/BuiltinsX86_64.def +++ b/include/clang/Basic/BuiltinsX86_64.def @@ -69,6 +69,8 @@ TARGET_BUILTIN(__builtin_ia32_bzhi_di, "ULLiULLiULLi", "", "bmi2") TARGET_BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "", "bmi2") TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "", "bmi2") TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm") +TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp") TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl") diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index deec33b4af..31015228f3 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -1752,6 +1752,7 @@ def mno_bmi : Flag<["-"], "mno-bmi">, Group; def mno_bmi2 : Flag<["-"], "mno-bmi2">, Group; def mno_popcnt : Flag<["-"], "mno-popcnt">, Group; def mno_tbm : Flag<["-"], "mno-tbm">, Group; +def mno_lwp : Flag<["-"], "mno-lwp">, Group; def mno_fma4 : Flag<["-"], "mno-fma4">, Group; def mno_fma : Flag<["-"], "mno-fma">, Group; def mno_xop : Flag<["-"], "mno-xop">, Group; @@ -1951,6 +1952,7 @@ def mbmi : Flag<["-"], "mbmi">, Group; def mbmi2 : Flag<["-"], "mbmi2">, Group; def mpopcnt : Flag<["-"], "mpopcnt">, Group; def mtbm : Flag<["-"], "mtbm">, Group; +def mlwp : Flag<["-"], "mlwp">, Group; def mfma4 : Flag<["-"], "mfma4">, Group; def mfma : Flag<["-"], "mfma">, Group; def mxop : Flag<["-"], "mxop">, Group; diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 16aa82f138..33eb0b05dd 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -2591,6 +2591,7 @@ class X86TargetInfo : public TargetInfo { bool HasRDSEED = false; bool HasADX = false; bool HasTBM = false; + bool HasLWP = false; bool HasFMA = false; bool HasF16C = false; bool HasAVX512CD = false; @@ -3363,6 +3364,7 @@ bool X86TargetInfo::initFeatureMap( case CK_BDVER1: // xop implies avx, sse4a and fma4. setFeatureEnabledImpl(Features, "xop", true); + setFeatureEnabledImpl(Features, "lwp", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); @@ -3634,6 +3636,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasADX = true; } else if (Feature == "+tbm") { HasTBM = true; + } else if (Feature == "+lwp") { + HasLWP = true; } else if (Feature == "+fma") { HasFMA = true; } else if (Feature == "+f16c") { @@ -3949,6 +3953,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasTBM) Builder.defineMacro("__TBM__"); + if (HasLWP) + Builder.defineMacro("__LWP__"); + if (HasMWAITX) Builder.defineMacro("__MWAITX__"); @@ -4132,6 +4139,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) + .Case("lwp", HasLWP) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 35aff4017e..6091db08a9 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -45,6 +45,7 @@ set(files inttypes.h iso646.h limits.h + lwpintrin.h lzcntintrin.h mm3dnow.h mmintrin.h diff --git a/lib/Headers/lwpintrin.h b/lib/Headers/lwpintrin.h new file mode 100644 index 0000000000..c95fdd9a20 --- /dev/null +++ b/lib/Headers/lwpintrin.h @@ -0,0 +1,150 @@ +/*===---- lwpintrin.h - LWP intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __LWPINTRIN_H +#define __LWPINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp"))) + +/// \brief Parses the LWPCB at the specified address and enables +/// profiling if valid. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LLWPCB instruction. +/// +/// \param __addr +/// Address to the new Lightweight Profiling Control Block (LWPCB). If the +/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables +/// Lightweight Profiling. +static __inline__ void __DEFAULT_FN_ATTRS +__llwpcb (void *__addr) +{ + __builtin_ia32_llwpcb(__addr); +} + +/// \brief Flushes the LWP state to memory and returns the address of the LWPCB. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the SLWPCB instruction. +/// +/// \return +/// Address to the current Lightweight Profiling Control Block (LWPCB). +/// If LWP is not currently enabled, returns NULL. +static __inline__ void* __DEFAULT_FN_ATTRS +__slwpcb () +{ + return __builtin_ia32_slwpcb(); +} + +/// \brief Inserts programmed event record into the LWP event ring buffer +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPINS instruction. +/// +/// \param DATA2 +/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +/// \returns If the ring buffer is full and LWP is running in Synchronized Mode, +/// the event record overwrites the last record in the buffer, the MissedEvents +/// counter in the LWPCB is incremented, the head pointer is not advanced, and +/// 1 is returned. Otherwise 0 is returned. +#define __lwpins32(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +/// \brief Decrements the LWP programmed value sample event counter. If the result is +/// negative, inserts an event record into the LWP event ring buffer in memory +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPVAL instruction. +/// +/// \param DATA2 +/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +#define __lwpval32(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +#ifdef __x86_64__ + +/// \brief Inserts programmed event record into the LWP event ring buffer +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPINS instruction. +/// +/// \param DATA2 +/// A 64-bit value is inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +/// \returns If the ring buffer is full and LWP is running in Synchronized Mode, +/// the event record overwrites the last record in the buffer, the MissedEvents +/// counter in the LWPCB is incremented, the head pointer is not advanced, and +/// 1 is returned. Otherwise 0 is returned. +#define __lwpins64(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +/// \brief Decrements the LWP programmed value sample event counter. If the result is +/// negative, inserts an event record into the LWP event ring buffer in memory +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPVAL instruction. +/// +/// \param DATA2 +/// A 64-bit value is and inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +#define __lwpval64(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __LWPINTRIN_H */ diff --git a/lib/Headers/x86intrin.h b/lib/Headers/x86intrin.h index 2003029cb5..ef1d02948c 100644 --- a/lib/Headers/x86intrin.h +++ b/lib/Headers/x86intrin.h @@ -72,6 +72,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) #include #endif diff --git a/test/CodeGen/lwp-builtins.c b/test/CodeGen/lwp-builtins.c new file mode 100644 index 0000000000..c689c3974d --- /dev/null +++ b/test/CodeGen/lwp-builtins.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lwp -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +void test_llwpcb(void *ptr) { + // CHECK-LABEL: @test_llwpcb + // CHECK: call void @llvm.x86.llwpcb(i8* %{{.*}}) + __llwpcb(ptr); +} + +void* test_slwpcb() { + // CHECK-LABEL: @test_slwpcb + // CHECK: call i8* @llvm.x86.slwpcb() + return __slwpcb(); +} + +unsigned char test_lwpins32(unsigned val2, unsigned val1) { + // CHECK-LABEL: @test_lwpins32 + // CHECK: call i8 @llvm.x86.lwpins32(i32 + return __lwpins32(val2, val1, 0x01234); +} + +unsigned char test_lwpins64(unsigned long long val2, unsigned val1) { + // CHECK-LABEL: @test_lwpins64 + // CHECK: call i8 @llvm.x86.lwpins64(i64 + return __lwpins64(val2, val1, 0x56789); +} + +void test_lwpval32(unsigned val2, unsigned val1) { + // CHECK-LABEL: @test_lwpval32 + // CHECK: call void @llvm.x86.lwpval32(i32 + __lwpval32(val2, val1, 0x01234); +} + +void test_lwpval64(unsigned long long val2, unsigned val1) { + // CHECK-LABEL: @test_lwpval64 + // CHECK: call void @llvm.x86.lwpval64(i64 + __lwpval64(val2, val1, 0xABCDEF); +} -- 2.40.0