From 0f7a7573151e21df4b3390eb612ae3a5460cc829 Mon Sep 17 00:00:00 2001 From: Geoff Berry Date: Mon, 28 Aug 2017 20:48:43 +0000 Subject: [PATCH] [AArch64][Falkor] Avoid generating STRQro* instructions Summary: STRQro* instructions are slower than the alternative ADD/STRQui expanded instructions on Falkor, so avoid generating them unless we're optimizing for code size. Reviewers: t.p.northover, mcrosier Subscribers: aemerson, rengolin, javed.absar, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D37020 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311931 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64.td | 6 ++- lib/Target/AArch64/AArch64InstrFormats.td | 12 ++---- lib/Target/AArch64/AArch64InstrInfo.td | 15 +++++++- lib/Target/AArch64/AArch64Subtarget.h | 2 + test/CodeGen/AArch64/strqro.ll | 47 +++++++++++++++++++++++ 5 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/AArch64/strqro.ll diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index c24229d26ee..8ff7f40ade4 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -94,6 +94,9 @@ def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", + "true", "STR of Q register with register offset is slow">; + def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", "true", "Use alternative pattern for sextload convert to f32">; @@ -339,7 +342,8 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeaturePredictableSelectIsExpensive, FeatureRDM, FeatureZCZeroing, - FeatureLSLFast + FeatureLSLFast, + FeatureSlowSTRQro ]>; def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 2e8c56a2b50..bfc730ae10a 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -3072,22 +3072,18 @@ multiclass Load128RO sz, bit V, bits<2> opc, RegisterClass regtype, multiclass Store128RO sz, bit V, bits<2> opc, RegisterClass regtype, string asm, ValueType Ty, SDPatternOperator storeop> { - let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in def roW : LoadStore128RO, + []>, Sched<[WriteSTIdx, ReadAdrBase]> { let Inst{13} = 0b0; } - let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in def roX : LoadStore128RO, + []>, Sched<[WriteSTIdx, ReadAdrBase]> { let Inst{13} = 0b1; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index b31180dc008..009c04f48c4 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -330,6 +330,8 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; let RecomputePerFunction = 1 in { def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">; def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">; + // Avoid generating STRQro if it is slow, unless we're optimizing for code size. + def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction()->optForSize()">; } include "AArch64InstrFormats.td" @@ -2139,6 +2141,17 @@ defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>; defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>; defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>; +let Predicates = [UseSTRQro], AddedComplexity = 10 in { + def : Pat<(store (f128 FPR128:$Rt), + (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend128:$extend)), + (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; + def : Pat<(store (f128 FPR128:$Rt), + (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend128:$extend)), + (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; +} + multiclass TruncStoreFrom64ROPat { @@ -2186,7 +2199,7 @@ defm : VecROStorePat; defm : VecROStorePat; // Match all store 128 bits width whose type is compatible with FPR128 -let Predicates = [IsLE] in { +let Predicates = [IsLE, UseSTRQro] in { // We must use ST1 to store vectors in big-endian. defm : VecROStorePat; defm : VecROStorePat; diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 553faf56afa..2aeb9f204f3 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -102,6 +102,7 @@ protected: bool UsePostRAScheduler = false; bool Misaligned128StoreIsSlow = false; bool Paired128IsSlow = false; + bool STRQroIsSlow = false; bool UseAlternateSExtLoadCVTF32Pattern = false; bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; @@ -219,6 +220,7 @@ public: bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } bool isPaired128Slow() const { return Paired128IsSlow; } + bool isSTRQroSlow() const { return STRQroIsSlow; } bool useAlternateSExtLoadCVTF32Pattern() const { return UseAlternateSExtLoadCVTF32Pattern; } diff --git a/test/CodeGen/AArch64/strqro.ll b/test/CodeGen/AArch64/strqro.ll new file mode 100644 index 00000000000..218248d54f8 --- /dev/null +++ b/test/CodeGen/AArch64/strqro.ll @@ -0,0 +1,47 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-STRQRO %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=falkor | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTRQRO %s + +; CHECK-LABEL: strqrox: +; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x +; CHECK-NOSTRQRO-NOT: str q{{[0-9]+}}, [x{{[0-9]+}}, x +define void @strqrox(fp128 %val64, i64 %base, i64 %offset) { + %addrint = add i64 %base, %offset + %addr = inttoptr i64 %addrint to fp128* + store volatile fp128 %val64, fp128* %addr + ret void +} + +; Check that STRQro is generated for both cases if we're optimizing for code size. +; CHECK-LABEL: strqrox_optsize: +; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x +; CHECK-NOSTRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x +define void @strqrox_optsize(fp128 %val64, i64 %base, i64 %offset) minsize { + %addrint = add i64 %base, %offset + %addr = inttoptr i64 %addrint to fp128* + store volatile fp128 %val64, fp128* %addr + ret void +} + +; CHECK-LABEL: strqrow: +; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w +; CHECK-NOSTRQRO-NOT: str q{{[0-9]+}}, [x{{[0-9]+}}, w +define void @strqrow(fp128 %val64, i64 %base, i32 %offset) { + %offset64 = zext i32 %offset to i64 + %addrint = add i64 %base, %offset64 + %addr = inttoptr i64 %addrint to fp128* + store volatile fp128 %val64, fp128* %addr + ret void +} + +; Check that STRQro is generated for both cases if we're optimizing for code size. +; CHECK-LABEL: strqrow_optsize: +; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w +; CHECK-NOSTRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w +define void @strqrow_optsize(fp128 %val64, i64 %base, i32 %offset) minsize { + %offset64 = zext i32 %offset to i64 + %addrint = add i64 %base, %offset64 + %addr = inttoptr i64 %addrint to fp128* + store volatile fp128 %val64, fp128* %addr + ret void +} + -- 2.50.1