From: Diogo N. Sampaio Date: Thu, 18 Jul 2019 10:05:56 +0000 (+0000) Subject: [ARM][DAGCOMBINE][FIX] PerformVMOVRRDCombine X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0bf4f3f8528e6b79cba86ab2ac41d616567f0c6d;p=llvm [ARM][DAGCOMBINE][FIX] PerformVMOVRRDCombine Summary: PerformVMOVRRDCombine ommits adding a offset of 4 to the PointerInfo, when converting a f64 = load[M] to {i32, i32} = {load[M], load[M + 4]} Which would allow the machine scheduller to break dependencies with the second load. - pr42638 Reviewers: eli.friedman, dmgreen, ostannard Reviewed By: ostannard Subscribers: ostannard, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64870 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366423 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 09b78115f2e..18bb9bf3ecc 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11748,9 +11748,11 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); - SDValue NewLD2 = DAG.getLoad( - MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), - std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags()); + + SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr, + LD->getPointerInfo().getWithOffset(4), + std::min(4U, LD->getAlignment()), + LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); if (DCI.DAG.getDataLayout().isBigEndian()) diff --git a/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll b/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll new file mode 100644 index 00000000000..aac5de4ce5e --- /dev/null +++ b/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll @@ -0,0 +1,33 @@ +; RUN: llc -stop-after=machine-scheduler -debug-only dagcombine,selectiondag -o - %s 2>&1 | FileCheck %s +; REQUIRES: asserts +; pr42638 +target triple = "armv8r-arm-none-eabi" +%struct.__va_list = type { i8* } +define double @foo(i32 %P0, ...) #0 { +entry: + %V1 = alloca [8 x i8], align 8 + %vl = alloca %struct.__va_list, align 4 + %0 = getelementptr inbounds [8 x i8], [8 x i8]* %V1, i32 0, i32 0 + call void asm sideeffect "", "r"(i8* nonnull %0) + %1 = bitcast %struct.__va_list* %vl to i8* + call void @llvm.va_start(i8* nonnull %1) + %2 = bitcast %struct.__va_list* %vl to double** + %argp.cur3 = load double*, double** %2, align 4 + %v.sroa.0.0.copyload = load double, double* %argp.cur3, align 4 + ret double %v.sroa.0.0.copyload +} + +declare void @llvm.va_start(i8*) + +attributes #0 = { "target-cpu"="cortex-r52" "target-features"="-fp64" } + +; Ensures that the machine scheduler does not move accessing the upper +; 32 bits of the double to before actually storing it to memory + +; CHECK: Creating new node: {{.*}} = add FrameIndex:i32<2>, Constant:i32<4> +; CHECK-NEXT: Creating new node: {{.*}} i32,ch = load<(load 4 from [[MEM:%.*]] + 4)> +; CHECK: INLINEASM +; CHECK: (load 4 from [[MEM]] + 4) +; CHECK-NOT: (store 4 into [[MEM]] + 4) + +