def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
"Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
+def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow",
+ "true", "STR of Q register with register offset is slow">;
+
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
"true", "Use alternative pattern for sextload convert to f32">;
FeaturePredictableSelectIsExpensive,
FeatureRDM,
FeatureZCZeroing,
- FeatureLSLFast
+ FeatureLSLFast,
+ FeatureSlowSTRQro
]>;
def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
- let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
- [(storeop (Ty regtype:$Rt),
- (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend128:$extend))]>,
+ []>,
Sched<[WriteSTIdx, ReadAdrBase]> {
let Inst{13} = 0b0;
}
- let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
- [(storeop (Ty regtype:$Rt),
- (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend128:$extend))]>,
+ []>,
Sched<[WriteSTIdx, ReadAdrBase]> {
let Inst{13} = 0b1;
}
let RecomputePerFunction = 1 in {
def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">;
def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">;
+ // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
+ def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction()->optForSize()">;
}
include "AArch64InstrFormats.td"
defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>;
defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>;
+let Predicates = [UseSTRQro], AddedComplexity = 10 in {
+ def : Pat<(store (f128 FPR128:$Rt),
+ (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend128:$extend)),
+ (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
+ def : Pat<(store (f128 FPR128:$Rt),
+ (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend128:$extend)),
+ (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
+}
+
multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
Instruction STRW, Instruction STRX> {
defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
// Match all store 128 bits width whose type is compatible with FPR128
-let Predicates = [IsLE] in {
+let Predicates = [IsLE, UseSTRQro] in {
// We must use ST1 to store vectors in big-endian.
defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
bool UsePostRAScheduler = false;
bool Misaligned128StoreIsSlow = false;
bool Paired128IsSlow = false;
+ bool STRQroIsSlow = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
bool isPaired128Slow() const { return Paired128IsSlow; }
+ bool isSTRQroSlow() const { return STRQroIsSlow; }
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-STRQRO %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=falkor | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTRQRO %s
+
+; CHECK-LABEL: strqrox:
+; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x
+; CHECK-NOSTRQRO-NOT: str q{{[0-9]+}}, [x{{[0-9]+}}, x
+define void @strqrox(fp128 %val64, i64 %base, i64 %offset) {
+ %addrint = add i64 %base, %offset
+ %addr = inttoptr i64 %addrint to fp128*
+ store volatile fp128 %val64, fp128* %addr
+ ret void
+}
+
+; Check that STRQro is generated for both cases if we're optimizing for code size.
+; CHECK-LABEL: strqrox_optsize:
+; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x
+; CHECK-NOSTRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x
+define void @strqrox_optsize(fp128 %val64, i64 %base, i64 %offset) minsize {
+ %addrint = add i64 %base, %offset
+ %addr = inttoptr i64 %addrint to fp128*
+ store volatile fp128 %val64, fp128* %addr
+ ret void
+}
+
+; CHECK-LABEL: strqrow:
+; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w
+; CHECK-NOSTRQRO-NOT: str q{{[0-9]+}}, [x{{[0-9]+}}, w
+define void @strqrow(fp128 %val64, i64 %base, i32 %offset) {
+ %offset64 = zext i32 %offset to i64
+ %addrint = add i64 %base, %offset64
+ %addr = inttoptr i64 %addrint to fp128*
+ store volatile fp128 %val64, fp128* %addr
+ ret void
+}
+
+; Check that STRQro is generated for both cases if we're optimizing for code size.
+; CHECK-LABEL: strqrow_optsize:
+; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w
+; CHECK-NOSTRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w
+define void @strqrow_optsize(fp128 %val64, i64 %base, i32 %offset) minsize {
+ %offset64 = zext i32 %offset to i64
+ %addrint = add i64 %base, %offset64
+ %addr = inttoptr i64 %addrint to fp128*
+ store volatile fp128 %val64, fp128* %addr
+ ret void
+}
+