From ae66674cad31c8f152c5f308d917d1cf246beddb Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 28 May 2019 12:58:07 +0000 Subject: [PATCH] [x86] fix 256-bit vector store splitting to honor 'volatile' Forking this out of the discussion in D62498 (and assuming that will be committed later, so adding the helper function here). The LangRef says: "the backend should never split or merge target-legal volatile load/store instructions." Differential Revision: https://reviews.llvm.org/D62506 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361815 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 44 ++++++++++++++++++++---------- test/CodeGen/X86/avx-load-store.ll | 6 ++-- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0bc31d5d516..7b4ce08b578 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21022,6 +21022,35 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } +/// Change a 256-bit vector store into a pair of 128-bit vector stores. +static SDValue split256BitStore(StoreSDNode *Store, SelectionDAG &DAG) { + SDValue StoredVal = Store->getValue(); + assert(StoredVal.getValueType().is256BitVector() && "Expecting 256-bit op"); + + // Splitting volatile memory ops is not allowed unless the operation was not + // legal to begin with. We are assuming the input op is legal (this transform + // is only used for targets with AVX). + if (Store->isVolatile()) + return SDValue(); + + MVT StoreVT = StoredVal.getSimpleValueType(); + unsigned NumElems = StoreVT.getVectorNumElements(); + SDLoc DL(Store); + SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, DL); + SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, DL); + SDValue Ptr0 = Store->getBasePtr(); + SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, DL); + unsigned Alignment = Store->getAlignment(); + SDValue Ch0 = + DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(), + Alignment, Store->getMemOperand()->getFlags()); + SDValue Ch1 = + DAG.getStore(Store->getChain(), DL, Value1, Ptr1, + Store->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16), Store->getMemOperand()->getFlags()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1); +} + static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { StoreSDNode *St = cast(Op.getNode()); @@ -39345,20 +39374,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, if (NumElems < 2) return SDValue(); - SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl); - SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl); - - SDValue Ptr0 = St->getBasePtr(); - SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl); - - SDValue Ch0 = - DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), - Alignment, St->getMemOperand()->getFlags()); - SDValue Ch1 = - DAG.getStore(St->getChain(), dl, Value1, Ptr1, - St->getPointerInfo().getWithOffset(16), - MinAlign(Alignment, 16U), St->getMemOperand()->getFlags()); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); + return split256BitStore(St, DAG); } // Optimize trunc store (of multiple scalars) to shuffle and store. diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll index 1fd4e07961d..7bd39f4d1d3 100644 --- a/test/CodeGen/X86/avx-load-store.ll +++ b/test/CodeGen/X86/avx-load-store.ll @@ -187,8 +187,10 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind { ; CHECK-LABEL: double_save_volatile: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovaps %xmm1, 16(%rdi) -; CHECK-NEXT: vmovaps %xmm0, (%rdi) +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vmovups %ymm0, (%rdi) +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq ; ; CHECK_O0-LABEL: double_save_volatile: -- 2.40.0