// instructions (stp).
SDLoc DL(&St);
SDValue BasePtr = St.getBasePtr();
+ const MachinePointerInfo &PtrInfo = St.getPointerInfo();
SDValue NewST1 =
- DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, St.getPointerInfo(),
+ DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
OrigAlignment, St.getMemOperand()->getFlags());
unsigned Offset = EltOffset;
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(Offset, DL, MVT::i64));
NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
- St.getPointerInfo(), Alignment,
+ PtrInfo.getWithOffset(Offset), Alignment,
St.getMemOperand()->getFlags());
Offset += EltOffset;
}
--- /dev/null
+; RUN: llc -mtriple=aarch64 -mcpu=cortex-a53 < %s | FileCheck %s
+
+; Tests to check that zero stores which are generated as STP xzr, xzr aren't
+; scheduled incorrectly due to incorrect alias information
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+%struct.tree_common = type { i8*, i8*, i32 }
+
+; Original test case which exhibited the bug
+define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) {
+; CHECK-LABEL: test1:
+; CHECK: stp xzr, xzr, [x0, #8]
+; CHECK: stp xzr, x2, [x0]
+; CHECK: str w1, [x0, #16]
+entry:
+ %0 = bitcast %struct.tree_common* %t to i8*
+ tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
+ %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2
+ store i32 %code, i32* %code1, align 8
+ %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
+ store i8* %type, i8** %type2, align 8
+ ret void
+}
+
+; Store to each struct element instead of using memset
+define void @test2(%struct.tree_common* %t, i32 %code, i8* %type) {
+; CHECK-LABEL: test2:
+; CHECK: stp xzr, xzr, [x0]
+; CHECK: str wzr, [x0, #16]
+; CHECK: str w1, [x0, #16]
+; CHECK: str x2, [x0, #8]
+entry:
+ %0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 0
+ %1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
+ %2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2
+ store i8* zeroinitializer, i8** %0, align 8
+ store i8* zeroinitializer, i8** %1, align 8
+ store i32 zeroinitializer, i32* %2, align 8
+ store i32 %code, i32* %2, align 8
+ store i8* %type, i8** %1, align 8
+ ret void
+}
+
+; Vector store instead of memset
+define void @test3(%struct.tree_common* %t, i32 %code, i8* %type) {
+; CHECK-LABEL: test3:
+; CHECK: stp xzr, xzr, [x0, #8]
+; CHECK: stp xzr, x2, [x0]
+; CHECK: str w1, [x0, #16]
+entry:
+ %0 = bitcast %struct.tree_common* %t to <3 x i64>*
+ store <3 x i64> zeroinitializer, <3 x i64>* %0, align 8
+ %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2
+ store i32 %code, i32* %code1, align 8
+ %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
+ store i8* %type, i8** %type2, align 8
+ ret void
+}
+
+; Vector store, then store to vector elements
+define void @test4(<3 x i64>* %p, i64 %x, i64 %y) {
+; CHECK-LABEL: test4:
+; CHECK: stp xzr, xzr, [x0, #8]
+; CHECK: stp xzr, x2, [x0]
+; CHECK: str x1, [x0, #16]
+entry:
+ store <3 x i64> zeroinitializer, <3 x i64>* %p, align 8
+ %0 = bitcast <3 x i64>* %p to i64*
+ %1 = getelementptr inbounds i64, i64* %0, i64 2
+ store i64 %x, i64* %1, align 8
+ %2 = getelementptr inbounds i64, i64* %0, i64 1
+ store i64 %y, i64* %2, align 8
+ ret void
+}
--- /dev/null
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+
+; Tests to check that the scheduler dependencies derived from alias analysis are
+; correct when we have loads that have been split up so that they can later be
+; merged into STP.
+
+; CHECK: ********** MI Scheduling **********
+; CHECK: test_splat:BB#0 entry
+; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%3+8]
+; CHECK: Successors:
+; CHECK-NEXT: ord [[SU1:SU\([0-9]+\)]]
+; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%3+4]
+; CHECK: Successors:
+; CHECK-NEXT: ord [[SU2:SU\([0-9]+\)]]
+; CHECK: [[SU1]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%2]
+; CHECK: [[SU2]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%1]
+define void @test_splat(i32 %x, i32 %y, i32* %p) {
+entry:
+ %val = load i32, i32* %p, align 4
+ %0 = getelementptr inbounds i32, i32* %p, i64 1
+ %1 = getelementptr inbounds i32, i32* %p, i64 2
+ %2 = getelementptr inbounds i32, i32* %p, i64 3
+ %vec0 = insertelement <4 x i32> undef, i32 %val, i32 0
+ %vec1 = insertelement <4 x i32> %vec0, i32 %val, i32 1
+ %vec2 = insertelement <4 x i32> %vec1, i32 %val, i32 2
+ %vec3 = insertelement <4 x i32> %vec2, i32 %val, i32 3
+ %3 = bitcast i32* %0 to <4 x i32>*
+ store <4 x i32> %vec3, <4 x i32>* %3, align 4
+ store i32 %x, i32* %2, align 4
+ store i32 %y, i32* %1, align 4
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+%struct.tree_common = type { i8*, i8*, i32 }
+
+; CHECK: ********** MI Scheduling **********
+; CHECK: test_zero:BB#0 entry
+; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 2; mem:ST8[%0+16]
+; CHECK: Successors:
+; CHECK-NEXT: ord [[SU3:SU\([0-9]+\)]]
+; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 1; mem:ST8[%0+8]
+; CHECK: Successors:
+; CHECK-NEXT: ord [[SU4:SU\([0-9]+\)]]
+; CHECK: [[SU3]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 4; mem:ST4[%code1]
+; CHECK: [[SU4]]: STRXui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 1; mem:ST8[%type2]
+define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) {
+entry:
+ %0 = bitcast %struct.tree_common* %t to i8*
+ tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
+ %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2
+ store i32 %code, i32* %code1, align 8
+ %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
+ store i8* %type, i8** %type2, align 8
+ ret void
+}