From: Nemanja Ivanovic Date: Tue, 4 Jun 2019 18:48:43 +0000 (+0000) Subject: Revert r362472 as it is breaking PPC build bots X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a3957457504981bd3e1ee5bee82fae03af5978c5;p=llvm Revert r362472 as it is breaking PPC build bots The patch https://reviews.llvm.org/rL362472 broke PPC LNT buildbots. Reverting it to bring the bots back to green. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362539 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b69936d462c..be1c1080165 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -524,7 +524,6 @@ namespace { const SDLoc &DL); SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); - SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); @@ -6276,180 +6275,6 @@ static Optional isBigEndian(const SmallVector &ByteOffsets, return BigEndian; } -static SDValue stripTruncAndExt(SDValue Value) { - switch (Value.getOpcode()) { - case ISD::TRUNCATE: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ANY_EXTEND: - return stripTruncAndExt(Value.getOperand(0)); - } - return Value; -} - -/// Match a pattern where a wide type scalar value is stored by several narrow -/// stores. Fold it into a single store or a BSWAP and a store if the targets -/// supports it. -/// -/// Assuming little endian target: -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 0) & 0xFF; -/// p[1] = (val >> 8) & 0xFF; -/// p[2] = (val >> 16) & 0xFF; -/// p[3] = (val >> 24) & 0xFF; -/// => -/// *((i32)p) = val; -/// -/// i8 *p = ... -/// i32 val = ... -/// p[0] = (val >> 24) & 0xFF; -/// p[1] = (val >> 16) & 0xFF; -/// p[2] = (val >> 8) & 0xFF; -/// p[3] = (val >> 0) & 0xFF; -/// => -/// *((i32)p) = BSWAP(val); -SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { - // Collect all the stores in the chain. - SDValue Chain; - SmallVector Stores; - for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) { - if (Store->getMemoryVT() != MVT::i8 || - Store->isVolatile() || Store->isIndexed()) - return SDValue(); - Stores.push_back(Store); - Chain = Store->getChain(); - } - // Handle the simple type only. - unsigned Width = Stores.size(); - EVT VT = EVT::getIntegerVT( - *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits()); - if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) - return SDValue(); - - // Check if all the bytes of the combined value we are looking at are stored - // to the same base address. Collect bytes offsets from Base address into - // ByteOffsets. - SDValue CombinedValue; - SmallVector ByteOffsets(Width); - int64_t FirstOffset = INT64_MAX; - StoreSDNode *FirstStore = nullptr; - Optional Base; - for (auto Store : Stores) { - // All the stores store different byte of the CombinedValue. A truncate is - // required to get that byte value. - SDValue Trunc = Store->getValue(); - if (Trunc.getOpcode() != ISD::TRUNCATE) - return SDValue(); - // A shift operation is required to get the right byte offset, except the - // first byte. - int64_t Offset = 0; - SDValue Value = Trunc.getOperand(0); - if (Value.getOpcode() == ISD::SRL || - Value.getOpcode() == ISD::SRA) { - ConstantSDNode *ShiftOffset = - dyn_cast(Value.getOperand(1)); - // Trying to match the following pattern. The shift offset must be - // a constant and a multiple of 8. It is the byte offset in "y". - // - // x = srl y, offset - // i8 z = trunc x - // store z, ... - if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8)) - return SDValue(); - - Offset = ShiftOffset->getSExtValue()/8; - Value = Value.getOperand(0); - } - - // Stores must share the same combined value with different offsets. - if (!CombinedValue) - CombinedValue = Value; - else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value)) - return SDValue(); - - // The trunc and all the extend operation should be stripped to get the - // real value we are stored. - else if (CombinedValue.getValueType() != VT) { - if (Value.getValueType() == VT || - Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits()) - CombinedValue = Value; - // Give up if the combined value type is smaller than the store size. - if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits()) - return SDValue(); - } - - // Stores must share the same base address - BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG); - int64_t ByteOffsetFromBase = 0; - if (!Base) - Base = Ptr; - else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) - return SDValue(); - - // Remember the first byte store - if (ByteOffsetFromBase < FirstOffset) { - FirstStore = Store; - FirstOffset = ByteOffsetFromBase; - } - // Map the offset in the store and the offset in the combined value. - if (Offset < 0 || Offset >= Width) - return SDValue(); - ByteOffsets[Offset] = ByteOffsetFromBase; - } - - assert(FirstOffset != INT64_MAX && "First byte offset must be set"); - assert(FirstStore && "First store must be set"); - - // Check if the bytes of the combined value we are looking at match with - // either big or little endian value store. - Optional IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); - if (!IsBigEndian.hasValue()) - return SDValue(); - - // The node we are looking at matches with the pattern, check if we can - // replace it with a single bswap if needed and store. - - // If the store needs byte swap check if the target supports it - bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian; - - // Before legalize we can introduce illegal bswaps which will be later - // converted to an explicit bswap sequence. This way we end up with a single - // store and byte shuffling instead of several stores and byte shuffling. - if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) - return SDValue(); - - // Check that a store of the wide type is both allowed and fast on the target - bool Fast = false; - bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - VT, FirstStore->getAddressSpace(), - FirstStore->getAlignment(), &Fast); - if (!Allowed || !Fast) - return SDValue(); - - if (VT != CombinedValue.getValueType()) { - assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() && - "Get unexpected store value to combine"); - CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, - CombinedValue); - } - - if (NeedsBswap) - CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue); - - SDValue NewStore = - DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(), - FirstStore->getPointerInfo(), FirstStore->getAlignment()); - - // Rely on other DAG combine rules to remove the other individual stores. - DAG.ReplaceAllUsesWith(N, NewStore.getNode()); - return NewStore; -} - /// Match a pattern where a wide type scalar value is loaded by several narrow /// loads and combined by shifts and ors. Fold it into a single load or a load /// and a BSWAP if the targets supports it. @@ -15968,10 +15793,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; - // Try transforming several stores into STORE (BSWAP). - if (SDValue Store = MatchStoreCombine(ST)) - return Store; - if (ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes, on this store and any // adjacent stores. diff --git a/test/CodeGen/PowerPC/store-combine.ll b/test/CodeGen/PowerPC/store-combine.ll index 9315df13080..77e02e76cde 100644 --- a/test/CodeGen/PowerPC/store-combine.ll +++ b/test/CodeGen/PowerPC/store-combine.ll @@ -10,12 +10,24 @@ define void @store_i32_by_i8(i32 signext %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: stw 3, 0(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 3, 0(4) +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: srwi 3, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, 2(4) +; CHECK-PPC64LE-NEXT: stb 3, 3(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: stwbrx 3, 0, 4 +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 3, 0(4) +; CHECK-PPC64-NEXT: stb 5, 1(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: srwi 3, 3, 24 +; CHECK-PPC64-NEXT: stb 5, 2(4) +; CHECK-PPC64-NEXT: stb 3, 3(4) ; CHECK-PPC64-NEXT: blr entry: %conv = trunc i32 %m to i8 @@ -43,12 +55,24 @@ entry: define void @store_i32_by_i8_bswap(i32 signext %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: stwbrx 3, 0, 4 +; CHECK-PPC64LE-NEXT: srwi 5, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, 0(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 5, 2(4) +; CHECK-PPC64LE-NEXT: stb 3, 3(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: stw 3, 0(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 24 +; CHECK-PPC64-NEXT: srwi 6, 3, 16 +; CHECK-PPC64-NEXT: stb 5, 0(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 6, 1(4) +; CHECK-PPC64-NEXT: stb 5, 2(4) +; CHECK-PPC64-NEXT: stb 3, 3(4) ; CHECK-PPC64-NEXT: blr entry: %0 = lshr i32 %m, 24 @@ -80,12 +104,40 @@ entry: define void @store_i64_by_i8(i64 %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i64_by_i8: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: stdx 3, 0, 4 +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8 +; CHECK-PPC64LE-NEXT: stb 3, 0(4) +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16 +; CHECK-PPC64LE-NEXT: stb 5, 2(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24 +; CHECK-PPC64LE-NEXT: stb 5, 3(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32 +; CHECK-PPC64LE-NEXT: stb 5, 4(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40 +; CHECK-PPC64LE-NEXT: stb 5, 5(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48 +; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56 +; CHECK-PPC64LE-NEXT: stb 5, 6(4) +; CHECK-PPC64LE-NEXT: stb 3, 7(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i64_by_i8: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: stdbrx 3, 0, 4 +; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8 +; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16 +; CHECK-PPC64-NEXT: stb 5, 1(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24 +; CHECK-PPC64-NEXT: stb 6, 2(4) +; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32 +; CHECK-PPC64-NEXT: stb 5, 3(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40 +; CHECK-PPC64-NEXT: stb 6, 4(4) +; CHECK-PPC64-NEXT: stb 3, 0(4) +; CHECK-PPC64-NEXT: stb 5, 5(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48 +; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56 +; CHECK-PPC64-NEXT: stb 5, 6(4) +; CHECK-PPC64-NEXT: stb 3, 7(4) ; CHECK-PPC64-NEXT: blr entry: %conv = trunc i64 %m to i8 @@ -133,12 +185,40 @@ entry: define void @store_i64_by_i8_bswap(i64 %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: stdbrx 3, 0, 4 +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8 +; CHECK-PPC64LE-NEXT: stb 3, 7(4) +; CHECK-PPC64LE-NEXT: stb 5, 6(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16 +; CHECK-PPC64LE-NEXT: stb 5, 5(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24 +; CHECK-PPC64LE-NEXT: stb 5, 4(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32 +; CHECK-PPC64LE-NEXT: stb 5, 3(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40 +; CHECK-PPC64LE-NEXT: stb 5, 2(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48 +; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56 +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: stb 3, 0(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: stdx 3, 0, 4 +; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8 +; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16 +; CHECK-PPC64-NEXT: stb 5, 6(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24 +; CHECK-PPC64-NEXT: stb 6, 5(4) +; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32 +; CHECK-PPC64-NEXT: stb 5, 4(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40 +; CHECK-PPC64-NEXT: stb 6, 3(4) +; CHECK-PPC64-NEXT: stb 3, 7(4) +; CHECK-PPC64-NEXT: stb 5, 2(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48 +; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56 +; CHECK-PPC64-NEXT: stb 5, 1(4) +; CHECK-PPC64-NEXT: stb 3, 0(4) ; CHECK-PPC64-NEXT: blr entry: %conv = trunc i64 %m to i8 @@ -187,18 +267,46 @@ entry: define void @store_i64_by_i8_bswap_uses(i32 signext %t, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap_uses: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: slwi [[REG:[0-9]+]], 3, 3 -; CHECK-PPC64LE-NEXT: subf [[REG1:[0-9]+]], 3, [[REG]] -; CHECK-PPC64LE-NEXT: extsw [[REG2:[0-9]+]], [[REG1]] -; CHECK-PPC64LE-NEXT: stdbrx [[REG2]], 0, 4 +; CHECK-PPC64LE-NEXT: slwi 5, 3, 3 +; CHECK-PPC64LE-NEXT: subf 3, 3, 5 +; CHECK-PPC64LE-NEXT: extsw 3, 3 +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8 +; CHECK-PPC64LE-NEXT: stb 3, 7(4) +; CHECK-PPC64LE-NEXT: stb 5, 6(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16 +; CHECK-PPC64LE-NEXT: stb 5, 5(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24 +; CHECK-PPC64LE-NEXT: stb 5, 4(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32 +; CHECK-PPC64LE-NEXT: stb 5, 3(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40 +; CHECK-PPC64LE-NEXT: stb 5, 2(4) +; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48 +; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56 +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: stb 3, 0(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap_uses: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: slwi [[REG:[0-9]+]], 3, 3 -; CHECK-PPC64-NEXT: subf [[REG1:[0-9]+]], 3, [[REG]] -; CHECK-PPC64-NEXT: extsw [[REG2:[0-9]+]], [[REG1]] -; CHECK-PPC64-NEXT: stdx [[REG2]], 0, 4 +; CHECK-PPC64-NEXT: slwi 5, 3, 3 +; CHECK-PPC64-NEXT: subf 3, 3, 5 +; CHECK-PPC64-NEXT: extsw 3, 3 +; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8 +; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16 +; CHECK-PPC64-NEXT: stb 5, 6(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24 +; CHECK-PPC64-NEXT: stb 6, 5(4) +; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32 +; CHECK-PPC64-NEXT: stb 5, 4(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40 +; CHECK-PPC64-NEXT: stb 6, 3(4) +; CHECK-PPC64-NEXT: stb 3, 7(4) +; CHECK-PPC64-NEXT: stb 5, 2(4) +; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48 +; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56 +; CHECK-PPC64-NEXT: stb 5, 1(4) +; CHECK-PPC64-NEXT: stb 3, 0(4) ; CHECK-PPC64-NEXT: blr entry: %mul = mul nsw i32 %t, 7 @@ -248,11 +356,25 @@ entry: define void @store_i32_by_i8_bswap_volatile(i32 signext %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_volatile: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NOT: stwbrx +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 3, 3(4) +; CHECK-PPC64LE-NEXT: stb 5, 2(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: srwi 3, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: stb 3, 0(4) +; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_volatile: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NOT: stw +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 3, 3(4) +; CHECK-PPC64-NEXT: stb 5, 2(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: srwi 3, 3, 24 +; CHECK-PPC64-NEXT: stb 5, 1(4) +; CHECK-PPC64-NEXT: stb 3, 0(4) +; CHECK-PPC64-NEXT: blr entry: %conv = trunc i32 %m to i8 %arrayidx = getelementptr inbounds i8, i8* %p, i64 3 @@ -281,11 +403,29 @@ entry: define void @store_i32_by_i8_bswap_store_in_between(i32 signext %m, i8* %p, i8* %q) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_store_in_between: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NOT: stwbrx +; CHECK-PPC64LE-NEXT: srwi 6, 3, 8 +; CHECK-PPC64LE-NEXT: stb 3, 3(4) +; CHECK-PPC64LE-NEXT: stb 6, 2(4) +; CHECK-PPC64LE-NEXT: li 6, 3 +; CHECK-PPC64LE-NEXT: stb 6, 0(5) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: srwi 3, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: stb 3, 0(4) +; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_store_in_between: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NOT: stw +; CHECK-PPC64-NEXT: li 6, 3 +; CHECK-PPC64-NEXT: srwi 7, 3, 8 +; CHECK-PPC64-NEXT: stb 7, 2(4) +; CHECK-PPC64-NEXT: stb 3, 3(4) +; CHECK-PPC64-NEXT: stb 6, 0(5) +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: srwi 3, 3, 24 +; CHECK-PPC64-NEXT: stb 5, 1(4) +; CHECK-PPC64-NEXT: stb 3, 0(4) +; CHECK-PPC64-NEXT: blr entry: %conv = trunc i32 %m to i8 %arrayidx = getelementptr inbounds i8, i8* %p, i64 3 @@ -308,11 +448,25 @@ entry: define void @store_i32_by_i8_bswap_unrelated_store(i32 signext %m, i8* %p, i8* %q) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_unrelated_store: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NOT: stwbrx +; CHECK-PPC64LE-NEXT: srwi 6, 3, 8 +; CHECK-PPC64LE-NEXT: stb 3, 3(4) +; CHECK-PPC64LE-NEXT: stb 6, 2(5) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: srwi 3, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, 1(4) +; CHECK-PPC64LE-NEXT: stb 3, 0(4) +; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_unrelated_store: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NOT: stw +; CHECK-PPC64-NEXT: srwi 6, 3, 8 +; CHECK-PPC64-NEXT: stb 3, 3(4) +; CHECK-PPC64-NEXT: stb 6, 2(5) +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: srwi 3, 3, 24 +; CHECK-PPC64-NEXT: stb 5, 1(4) +; CHECK-PPC64-NEXT: stb 3, 0(4) +; CHECK-PPC64-NEXT: blr entry: %conv = trunc i32 %m to i8 %arrayidx = getelementptr inbounds i8, i8* %p, i64 3 @@ -339,13 +493,24 @@ entry: define void @store_i32_by_i8_bswap_nonzero_offset(i32 signext %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_nonzero_offset: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: addi [[REG1:[0-9]+]], 4, 1 -; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG1]] +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 5, 3(4) +; CHECK-PPC64LE-NEXT: stb 3, 4(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: srwi 3, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, 2(4) +; CHECK-PPC64LE-NEXT: stb 3, 1(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_nonzero_offset: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: stw 3, 1(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 3, 4(4) +; CHECK-PPC64-NEXT: stb 5, 3(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: srwi 3, 3, 24 +; CHECK-PPC64-NEXT: stb 5, 2(4) +; CHECK-PPC64-NEXT: stb 3, 1(4) ; CHECK-PPC64-NEXT: blr entry: %0 = lshr i32 %m, 8 @@ -374,13 +539,24 @@ entry: define void @store_i32_by_i8_neg_offset(i32 signext %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_neg_offset: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: stw 3, -4(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 5, -3(4) +; CHECK-PPC64LE-NEXT: stb 3, -4(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: srwi 3, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, -2(4) +; CHECK-PPC64LE-NEXT: stb 3, -1(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_neg_offset: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: addi [[REG1:[0-9]+]], 4, -4 -; CHECK-PPC64-NEXT: stwbrx 3, 0, [[REG1]] +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 3, -4(4) +; CHECK-PPC64-NEXT: stb 5, -3(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: srwi 3, 3, 24 +; CHECK-PPC64-NEXT: stb 5, -2(4) +; CHECK-PPC64-NEXT: stb 3, -1(4) ; CHECK-PPC64-NEXT: blr entry: %0 = lshr i32 %m, 8 @@ -409,13 +585,24 @@ entry: define void @store_i32_by_i8_bswap_neg_offset(i32 signext %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_neg_offset: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: addi [[REG1:[0-9]+]], 4, -4 -; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG1]] +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: stb 5, -3(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, -4(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 5, -2(4) +; CHECK-PPC64LE-NEXT: stb 3, -1(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_neg_offset: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: stw 3, -4(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: srwi 6, 3, 24 +; CHECK-PPC64-NEXT: stb 5, -3(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 6, -4(4) +; CHECK-PPC64-NEXT: stb 5, -2(4) +; CHECK-PPC64-NEXT: stb 3, -1(4) ; CHECK-PPC64-NEXT: blr entry: %0 = lshr i32 %m, 16 @@ -444,17 +631,28 @@ entry: define void @store_i32_by_i8_bswap_base_index_offset(i32 %m, i32 %i, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_base_index_offset: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: extsw [[REG1:[0-9]+]], 4 -; CHECK-PPC64LE-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]] -; CHECK-PPC64LE-NEXT: addi [[REG3:[0-9]+]], [[REG2]], -4 -; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG3]] +; CHECK-PPC64LE-NEXT: extsw 4, 4 +; CHECK-PPC64LE-NEXT: srwi 6, 3, 16 +; CHECK-PPC64LE-NEXT: add 4, 5, 4 +; CHECK-PPC64LE-NEXT: srwi 5, 3, 24 +; CHECK-PPC64LE-NEXT: stb 6, -3(4) +; CHECK-PPC64LE-NEXT: stb 5, -4(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 5, -2(4) +; CHECK-PPC64LE-NEXT: stb 3, -1(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_base_index_offset: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: extsw [[REG1:[0-9]+]], 4 -; CHECK-PPC64-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]] -; CHECK-PPC64-NEXT: stw 3, -4([[REG2]]) +; CHECK-PPC64-NEXT: extsw 4, 4 +; CHECK-PPC64-NEXT: srwi 6, 3, 16 +; CHECK-PPC64-NEXT: add 4, 5, 4 +; CHECK-PPC64-NEXT: srwi 5, 3, 24 +; CHECK-PPC64-NEXT: stb 6, -3(4) +; CHECK-PPC64-NEXT: srwi 6, 3, 8 +; CHECK-PPC64-NEXT: stb 5, -4(4) +; CHECK-PPC64-NEXT: stb 6, -2(4) +; CHECK-PPC64-NEXT: stb 3, -1(4) ; CHECK-PPC64-NEXT: blr entry: %0 = lshr i32 %m, 16 @@ -496,17 +694,28 @@ entry: define void @store_i32_by_i8_bswap_complicated(i32 %m, i32 %i, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_complicated: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: extsw [[REG1:[0-9]+]], 4 -; CHECK-PPC64LE-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]] -; CHECK-PPC64LE-NEXT: addi [[REG3:[0-9]+]], [[REG2]], 3 -; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG3]] +; CHECK-PPC64LE-NEXT: extsw 4, 4 +; CHECK-PPC64LE-NEXT: add 4, 5, 4 +; CHECK-PPC64LE-NEXT: srwi 5, 3, 24 +; CHECK-PPC64LE-NEXT: stb 5, 3(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 16 +; CHECK-PPC64LE-NEXT: stb 5, 4(4) +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 5, 5(4) +; CHECK-PPC64LE-NEXT: stb 3, 6(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_complicated: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: extsw [[REG1:[0-9]+]], 4 -; CHECK-PPC64-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]] -; CHECK-PPC64-NEXT: stw 3, 3([[REG2]]) +; CHECK-PPC64-NEXT: extsw 4, 4 +; CHECK-PPC64-NEXT: srwi 6, 3, 24 +; CHECK-PPC64-NEXT: add 4, 5, 4 +; CHECK-PPC64-NEXT: srwi 5, 3, 16 +; CHECK-PPC64-NEXT: stb 6, 3(4) +; CHECK-PPC64-NEXT: stb 5, 4(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 5, 5(4) +; CHECK-PPC64-NEXT: stb 3, 6(4) ; CHECK-PPC64-NEXT: blr entry: %idx.ext = sext i32 %i to i64 @@ -536,12 +745,16 @@ entry: define void @store_i16_by_i8_bswap(i16 %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i16_by_i8_bswap: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: sthbrx 3, 0, 4 +; CHECK-PPC64LE-NEXT: srwi 5, 3, 8 +; CHECK-PPC64LE-NEXT: stb 5, 0(4) +; CHECK-PPC64LE-NEXT: stb 3, 1(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i16_by_i8_bswap: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: sth 3, 0(4) +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 5, 0(4) +; CHECK-PPC64-NEXT: stb 3, 1(4) ; CHECK-PPC64-NEXT: blr entry: %0 = lshr i16 %m, 8 @@ -558,12 +771,16 @@ entry: define void @store_16_by_i8(i16 %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_16_by_i8: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: sth 3, 0(4) +; CHECK-PPC64LE-NEXT: stb 3, 0(4) +; CHECK-PPC64LE-NEXT: srwi 3, 3, 8 +; CHECK-PPC64LE-NEXT: stb 3, 1(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_16_by_i8: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: sthbrx 3, 0, 4 +; CHECK-PPC64-NEXT: srwi 5, 3, 8 +; CHECK-PPC64-NEXT: stb 3, 0(4) +; CHECK-PPC64-NEXT: stb 5, 1(4) ; CHECK-PPC64-NEXT: blr entry: %conv1 = trunc i16 %m to i8