From: Kyrylo Tkachov Date: Tue, 16 Jul 2019 09:27:39 +0000 (+0000) Subject: [AArch64] Implement __jcvt intrinsic from Armv8.3-A X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7bb8f7b06a98fc389c17267b9dc028b11bffbf92;p=clang [AArch64] Implement __jcvt intrinsic from Armv8.3-A The jcvt intrinsic defined in ACLE [1] is available when ARM_FEATURE_JCVT is defined. This change introduces the AArch64 intrinsic, wires it up to the instruction and a new clang builtin function. The __ARM_FEATURE_JCVT macro is now defined when an Armv8.3-A or higher target is used. I've implemented the target detection logic in Clang so that this feature is enabled for architectures from armv8.3-a onwards (so -march=armv8.4-a also enables this, for example). make check-all didn't show any new failures. [1] https://developer.arm.com/docs/101028/latest/data-processing-intrinsics Differential Revision: https://reviews.llvm.org/D64495 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@366197 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsAArch64.def b/include/clang/Basic/BuiltinsAArch64.def index 5ba03da4a7..7701ad98f4 100644 --- a/include/clang/Basic/BuiltinsAArch64.def +++ b/include/clang/Basic/BuiltinsAArch64.def @@ -65,6 +65,8 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc") BUILTIN(__builtin_arm_dsb, "vUi", "nc") BUILTIN(__builtin_arm_isb, "vUi", "nc") +BUILTIN(__builtin_arm_jcvt, "Zid", "nc") + // Prefetch BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc") diff --git a/lib/Basic/Targets/AArch64.cpp b/lib/Basic/Targets/AArch64.cpp index 6011ae17b7..a02530ad06 100644 --- a/lib/Basic/Targets/AArch64.cpp +++ b/lib/Basic/Targets/AArch64.cpp @@ -118,6 +118,28 @@ void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, getTargetDefinesARMV81A(Opts, Builder); } +void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const { + Builder.defineMacro("__ARM_FEATURE_JCVT", "1"); + // Also include the Armv8.2 defines + getTargetDefinesARMV82A(Opts, Builder); +} + +void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the Armv8.3 defines + // FIXME: Armv8.4 makes some extensions mandatory. Handle them here. + getTargetDefinesARMV83A(Opts, Builder); +} + +void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the Armv8.4 defines + // FIXME: Armv8.5 makes some extensions mandatory. Handle them here. + getTargetDefinesARMV84A(Opts, Builder); +} + + void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. @@ -209,6 +231,15 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::AArch64::ArchKind::ARMV8_2A: getTargetDefinesARMV82A(Opts, Builder); break; + case llvm::AArch64::ArchKind::ARMV8_3A: + getTargetDefinesARMV83A(Opts, Builder); + break; + case llvm::AArch64::ArchKind::ARMV8_4A: + getTargetDefinesARMV84A(Opts, Builder); + break; + case llvm::AArch64::ArchKind::ARMV8_5A: + getTargetDefinesARMV85A(Opts, Builder); + break; } // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work. @@ -256,6 +287,12 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, ArchKind = llvm::AArch64::ArchKind::ARMV8_1A; if (Feature == "+v8.2a") ArchKind = llvm::AArch64::ArchKind::ARMV8_2A; + if (Feature == "+v8.3a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_3A; + if (Feature == "+v8.4a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_4A; + if (Feature == "+v8.5a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_5A; if (Feature == "+fullfp16") HasFullFP16 = 1; if (Feature == "+dotprod") diff --git a/lib/Basic/Targets/AArch64.h b/lib/Basic/Targets/AArch64.h index 0241b585c4..de0aed78e0 100644 --- a/lib/Basic/Targets/AArch64.h +++ b/lib/Basic/Targets/AArch64.h @@ -59,6 +59,12 @@ public: MacroBuilder &Builder) const; void getTargetDefinesARMV82A(const LangOptions &Opts, MacroBuilder &Builder) const; + void getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const; + void getTargetDefinesARMV84A(const LangOptions &Opts, + MacroBuilder &Builder) const; + void getTargetDefinesARMV85A(const LangOptions &Opts, + MacroBuilder &Builder) const; void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index c58d1018fa..acaa81ae8a 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -6977,6 +6977,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { + assert((getContext().getTypeSize(E->getType()) == 32) && + "__jcvt of unusual size!"); + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); + } + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); diff --git a/lib/Headers/arm_acle.h b/lib/Headers/arm_acle.h index 08d65fa0d0..096cc261af 100644 --- a/lib/Headers/arm_acle.h +++ b/lib/Headers/arm_acle.h @@ -597,6 +597,14 @@ __crc32cd(uint32_t __a, uint64_t __b) { } #endif +/* Armv8.3-A Javascript conversion intrinsic */ +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +__jcvt(double __a) { + return __builtin_arm_jcvt(__a); +} +#endif + /* 10.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) diff --git a/test/CodeGen/arm_acle.c b/test/CodeGen/arm_acle.c index e8a744372d..beca937350 100644 --- a/test/CodeGen/arm_acle.c +++ b/test/CodeGen/arm_acle.c @@ -2,6 +2,9 @@ // RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch32 -check-prefix=ARM-NEWPM -check-prefix=AArch32-NEWPM // RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O2 -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-LEGACY -check-prefix=AArch64-LEGACY // RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-NEWPM -check-prefix=AArch64-NEWPM +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.3a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3 +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.4a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3 +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.5a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3 #include @@ -823,3 +826,11 @@ void test_wsrp(void *v) { // AArch64: ![[M0]] = !{!"1:2:3:4:5"} // AArch64: ![[M1]] = !{!"sysreg"} + +// AArch64-v8_3-LABEL: @test_jcvt( +// AArch64-v8_3: call i32 @llvm.aarch64.fjcvtzs +#ifdef __ARM_64BIT_STATE +int32_t test_jcvt(double v) { + return __jcvt(v); +} +#endif diff --git a/test/CodeGen/builtins-arm64.c b/test/CodeGen/builtins-arm64.c index f164c2f6f3..5ec63fba82 100644 --- a/test/CodeGen/builtins-arm64.c +++ b/test/CodeGen/builtins-arm64.c @@ -58,6 +58,12 @@ void prefetch() { // CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 3, i32 0) } +int32_t jcvt(double v) { + //CHECK-LABEL: @jcvt( + //CHECK: call i32 @llvm.aarch64.fjcvtzs + return __builtin_arm_jcvt(v); +} + __typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void); uint32_t rsr() {