LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
if (!Ld) break;
- // Loads must only have one use.
- if (!Ld->hasNUsesOfValue(1, 0))
- break;
-
// The memory operands must not be volatile.
if (Ld->isVolatile() || Ld->isIndexed())
break;
if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
break;
- // The stored memory type must be the same.
- if (Ld->getMemoryVT() != MemVT)
- break;
-
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
// If this is not the first ptr that we check.
if (LdBasePtr.Base.getNode()) {
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
+ if (SDValue(Ld, 0).hasOneUse()) {
+ // Only the original store used value so just replace chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ } else {
+ // Multiple uses exist. Keep the old load in line with the new
+ // load, i.e. Replace chains using Ld's chain with a
+ // TokenFactor. Create a temporary node to serve as a placer so
+ // we do not replace the reference to original Load's chain in
+ // the TokenFactor.
+ SDValue TokenDummy = DAG.getNode(ISD::DummyNode, SDLoc(Ld), MVT::Other);
+
+ // Replace all references to Load's output chain to TokenDummy
+ CombineTo(Ld, SDValue(Ld, 0), TokenDummy, false);
+ SDValue Token =
+ DAG.getNode(ISD::TokenFactor, SDLoc(Ld), MVT::Other, SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ // Replace all uses of TokenDummy from itself to Ld's output chain.
+ CombineTo(TokenDummy.getNode(), Token);
+ assert(TokenDummy.use_empty() && "TokenDummy should be unused");
+ AddToWorklist(Ld);
+ }
}
// Replace the all stores with the new store.
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -o - | FileCheck %s
-
+; PR32086
target triple = "x86_64-unknown-linux-gnu"
define void @merge_double(double* noalias nocapture %st, double* noalias nocapture readonly %ld) #0 {
; CHECK-LABEL: merge_double:
; CHECK: # BB#0:
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: movsd %xmm0, (%rdi)
-; CHECK-NEXT: movsd %xmm1, 8(%rdi)
-; CHECK-NEXT: movsd %xmm0, 16(%rdi)
-; CHECK-NEXT: movsd %xmm1, 24(%rdi)
+; CHECK-NEXT: movups (%rsi), %xmm0
+; CHECK-NEXT: movups %xmm0, (%rdi)
+; CHECK-NEXT: movups %xmm0, 16(%rdi)
; CHECK-NEXT: retq
%ld_idx1 = getelementptr inbounds double, double* %ld, i64 1
%ld0 = load double, double* %ld, align 8, !tbaa !2
define void @merge_loadstore_int(i64* noalias nocapture readonly %p, i64* noalias nocapture %q) local_unnamed_addr #0 {
; CHECK-LABEL: merge_loadstore_int:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: movq 8(%rdi), %rcx
-; CHECK-NEXT: movq %rax, (%rsi)
-; CHECK-NEXT: movq %rcx, 8(%rsi)
-; CHECK-NEXT: movq %rax, 16(%rsi)
-; CHECK-NEXT: movq %rcx, 24(%rsi)
+; CHECK-NEXT: movups (%rdi), %xmm0
+; CHECK-NEXT: movups %xmm0, (%rsi)
+; CHECK-NEXT: movups %xmm0, 16(%rsi)
; CHECK-NEXT: retq
entry:
%0 = load i64, i64* %p, align 8, !tbaa !1
; CHECK-LABEL: merge_loadstore_int_with_extra_use:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: movq 8(%rdi), %rcx
-; CHECK-NEXT: movq %rax, (%rsi)
-; CHECK-NEXT: movq %rcx, 8(%rsi)
-; CHECK-NEXT: movq %rax, 16(%rsi)
-; CHECK-NEXT: movq %rcx, 24(%rsi)
+; CHECK-NEXT: movups (%rdi), %xmm0
+; CHECK-NEXT: movups %xmm0, (%rsi)
+; CHECK-NEXT: movups %xmm0, 16(%rsi)
; CHECK-NEXT: retq
entry:
%0 = load i64, i64* %p, align 8, !tbaa !1