Revert patch series introducing the DAG combine to match a load-by-bytes

author Chandler Carruth <chandlerc@gmail.com>

Fri, 16 Dec 2016 04:05:22 +0000 (04:05 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Fri, 16 Dec 2016 04:05:22 +0000 (04:05 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Fri, 16 Dec 2016 04:05:22 +0000 (04:05 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Fri, 16 Dec 2016 04:05:22 +0000 (04:05 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index a0f2b63424aaed0050ca4249c324642ff39c085f..3638c61288f07f5838884a9262918ffc7f69468f 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,7 +20,6 @@
  #include "llvm/ADT/SetVector.h"
  #include "llvm/ADT/SmallBitVector.h"
  #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -377,7 +376,6 @@ namespace {
                                unsigned PosOpcode, unsigned NegOpcode,
                                const SDLoc &DL);
      SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
-    SDValue MatchLoadCombine(SDNode *N);
      SDValue ReduceLoadWidth(SDNode *N);
      SDValue ReduceLoadOpStoreWidth(SDNode *N);
      SDValue splitMergedValStore(StoreSDNode *ST);
@@ -3979,9 +3977,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
      return SDValue(Rot, 0);
  
-  if (SDValue Load = MatchLoadCombine(N))
-    return Load;
-
    // Simplify the operands using demanded-bits information.
    if (!VT.isVector() &&
        SimplifyDemandedBits(SDValue(N, 0)))
@@ -4353,284 +4348,6 @@ struct BaseIndexOffset {
  };
  } // namespace
  
-namespace {
-/// Represents the origin of an individual byte in load combine pattern. The
-/// value of the byte is either unknown, zero or comes from memory.
-struct ByteProvider {
-  enum ProviderTy {
-    Unknown,
-    ZeroConstant,
-    Memory
-  };
-
-  ProviderTy Kind;
-  // Load and ByteOffset are set for Memory providers only.
-  // Load represents the node which loads the byte from memory.
-  // ByteOffset is the offset of the byte in the value produced by the load.
-  LoadSDNode *Load;
-  unsigned ByteOffset;
-
-  ByteProvider() : Kind(ProviderTy::Unknown), Load(nullptr), ByteOffset(0) {}
-
-  static ByteProvider getUnknown() {
-    return ByteProvider(ProviderTy::Unknown, nullptr, 0);
-  }
-  static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
-    return ByteProvider(ProviderTy::Memory, Load, ByteOffset);
-  }
-  static ByteProvider getZero() {
-    return ByteProvider(ProviderTy::ZeroConstant, nullptr, 0);
-  }
-
-  bool operator==(const ByteProvider &Other) const {
-    return Other.Kind == Kind && Other.Load == Load &&
-           Other.ByteOffset == ByteOffset;
-  }
-
-private:
-  ByteProvider(ProviderTy Kind, LoadSDNode *Load, unsigned ByteOffset)
-      : Kind(Kind), Load(Load), ByteOffset(ByteOffset) {}
-};
-
-/// Recursively traverses the expression collecting the origin of individual
-/// bytes of the given value. For all the values except the root of the
-/// expression verifies that it doesn't have uses outside of the expression.
-const Optional<SmallVector<ByteProvider, 4> >
-collectByteProviders(SDValue Op, bool CheckNumberOfUses = false) {
-  if (CheckNumberOfUses && !Op.hasOneUse())
-    return None;
-
-  unsigned BitWidth = Op.getScalarValueSizeInBits();
-  if (BitWidth % 8 != 0)
-    return None;
-  unsigned ByteWidth = BitWidth / 8;
-
-  switch (Op.getOpcode()) {
-  case ISD::OR: {
-    auto LHS = collectByteProviders(Op->getOperand(0),
-                                    /*CheckNumberOfUses=*/true);
-    auto RHS = collectByteProviders(Op->getOperand(1),
-                                    /*CheckNumberOfUses=*/true);
-    if (!LHS || !RHS)
-      return None;
-
-    auto OR = [](ByteProvider LHS, ByteProvider RHS) {
-      if (LHS == RHS)
-        return LHS;
-      if (LHS.Kind == ByteProvider::Unknown ||
-          RHS.Kind == ByteProvider::Unknown)
-        return ByteProvider::getUnknown();
-      if (LHS.Kind == ByteProvider::Memory && RHS.Kind == ByteProvider::Memory)
-        return ByteProvider::getUnknown();
-
-      if (LHS.Kind == ByteProvider::Memory)
-        return LHS;
-      else
-        return RHS;
-    };
-
-    SmallVector<ByteProvider, 4> Result(ByteWidth);
-    for (unsigned i = 0; i < LHS->size(); i++)
-      Result[i] = OR(LHS.getValue()[i], RHS.getValue()[i]);
-
-    return Result;
-  }
-  case ISD::SHL: {
-    auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
-    if (!ShiftOp)
-      return None;
-
-    uint64_t BitShift = ShiftOp->getZExtValue();
-    if (BitShift % 8 != 0)
-      return None;
-    uint64_t ByteShift = BitShift / 8;
-
-    auto Original = collectByteProviders(Op->getOperand(0),
-                                         /*CheckNumberOfUses=*/true);
-    if (!Original)
-      return None;
-
-    SmallVector<ByteProvider, 4> Result;
-    Result.insert(Result.begin(), ByteShift, ByteProvider::getZero());
-    Result.insert(Result.end(), Original->begin(),
-                  std::prev(Original->end(), ByteShift));
-    assert(Result.size() == ByteWidth &&
-           "Computed width doesn't match from type width");
-    return Result;
-  }
-  case ISD::ZERO_EXTEND: {
-    auto Original = collectByteProviders(Op->getOperand(0),
-                                         /*CheckNumberOfUses=*/true);
-    if (!Original)
-      return None;
-
-    SmallVector<ByteProvider, 4> Result;
-    unsigned NarrowByteWidth = Original->size();
-    Result.insert(Result.begin(), Original->begin(), Original->end());
-    Result.insert(Result.end(), ByteWidth - NarrowByteWidth,
-                  ByteProvider::getZero());
-    assert(Result.size() == ByteWidth &&
-           "Computed width doesn't match from type width");
-    return Result;
-  }
-  case ISD::LOAD: {
-    auto L = cast<LoadSDNode>(Op.getNode());
-    if (L->isVolatile() || L->isIndexed() ||
-        L->getExtensionType() != ISD::NON_EXTLOAD)
-      return None;
-
-    assert(BitWidth == L->getMemoryVT().getSizeInBits() &&
-           "For non-extend loads widths must be the same");
-
-    SmallVector<ByteProvider, 4> Result(ByteWidth);
-    for (unsigned i = 0; i < ByteWidth; i++)
-      Result[i] = ByteProvider::getMemory(L, i);
-
-    return Result;
-  }
-  }
-
-  return None;
-}
-} // namespace
-
-/// Match a pattern where a wide type scalar value is loaded by several narrow
-/// loads and combined by shifts and ors. Fold it into a single load or a load
-/// and a BSWAP if the targets supports it.
-///
-/// Assuming little endian target:
-///  i8 *a = ...
-///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
-/// =>
-///  i32 val = *((i32)a)
-///
-///  i8 *a = ...
-///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
-/// =>
-///  i32 val = BSWAP(*((i32)a))
-SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
-  assert(N->getOpcode() == ISD::OR &&
-         "Can only match load combining against OR nodes");
-
-  // Handles simple types only
-  EVT VT = N->getValueType(0);
-  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
-    return SDValue();
-
-  // There is nothing to do here if the target can't load a value of this type
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (!TLI.isOperationLegal(ISD::LOAD, VT))
-    return SDValue();
-
-  // Calculate byte providers for the OR we are looking at
-  auto Res = collectByteProviders(SDValue(N, 0));
-  if (!Res)
-    return SDValue();
-  auto &Bytes = Res.getValue();
-  unsigned ByteWidth = Bytes.size();
-  assert(VT.getSizeInBits() == ByteWidth * 8 &&
-         "collectByteProviders computed width differs from type width");
-
-  auto LittleEndianByteAt = [](unsigned BW, unsigned i) { return i; };
-  auto BigEndianByteAt = [](unsigned BW, unsigned i) { return BW - i - 1; };
-
-  Optional<BaseIndexOffset> Base;
-  SDValue Chain;
-
-  SmallSet<LoadSDNode *, 8> Loads;
-  LoadSDNode *FirstLoad = nullptr;
-
-  // Check if all the bytes of the OR we are looking at are loaded from the same
-  // base address. Collect bytes offsets from Base address in ByteOffsets.
-  SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
-  for (unsigned i = 0; i < ByteWidth; i++) {
-    // All the bytes must be loaded from memory
-    if (Bytes[i].Kind != ByteProvider::Memory)
-      return SDValue();
-
-    LoadSDNode *L = Bytes[i].Load;
-    assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
-           (L->getExtensionType() == ISD::NON_EXTLOAD) &&
-           "Must be enforced by collectByteProviders");
-    assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
-
-    // All loads must share the same chain
-    SDValue LChain = L->getChain();
-    if (!Chain)
-      Chain = LChain;
-    if (Chain != LChain)
-      return SDValue();
-
-    // Loads must share the same base address
-    BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
-    if (!Base)
-      Base = Ptr;
-    if (!Base->equalBaseIndex(Ptr))
-      return SDValue();
-
-    // Calculate the offset of the current byte from the base address
-    unsigned LoadBitWidth = L->getMemoryVT().getSizeInBits();
-    assert(LoadBitWidth % 8 == 0 &&
-           "can only analyze providers for individual bytes not bit");
-    unsigned LoadByteWidth = LoadBitWidth / 8;
-    int64_t MemoryByteOffset =
-        DAG.getDataLayout().isBigEndian()
-            ? BigEndianByteAt(LoadByteWidth, Bytes[i].ByteOffset)
-            : LittleEndianByteAt(LoadByteWidth, Bytes[i].ByteOffset);
-    int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset;
-    ByteOffsets[i] = ByteOffsetFromBase;
-
-    // Remember the first byte load
-    if (ByteOffsetFromBase == 0)
-      FirstLoad = L;
-
-    Loads.insert(L);
-  }
-  assert(Base && "must be set");
-  assert(Loads.size() > 0 && "must be at least one load");
-
-  // Check if the bytes of the OR we are looking at match with either big or
-  // little endian value load
-  bool BigEndian = true, LittleEndian = true;
-  for (unsigned i = 0; i < ByteWidth; i++) {
-    LittleEndian &= ByteOffsets[i] == LittleEndianByteAt(ByteWidth, i);
-    BigEndian &= ByteOffsets[i] == BigEndianByteAt(ByteWidth, i);
-    if (!BigEndian && !LittleEndian)
-      return SDValue();
-  }
-  assert((BigEndian != LittleEndian) && "should be either or");
-  assert(FirstLoad && "must be set");
-
-  // The node we are looking at matches with the pattern, check if we can
-  // replace it with a single load and bswap if needed.
-
-  // If the load needs byte swap check if the target supports it
-  bool NeedsBswap = DAG.getDataLayout().isBigEndian() != BigEndian;
-  if (NeedsBswap && !TLI.isOperationLegal(ISD::BSWAP, VT))
-    return SDValue();
-
-  // Check that a load of the wide type is both allowed and fast on the target
-  bool Fast = false;
-  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
-                                        VT, FirstLoad->getAddressSpace(),
-                                        FirstLoad->getAlignment(), &Fast);
-  if (!Allowed || !Fast)
-    return SDValue();
-
-  SDValue NewLoad =
-      DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
-                  FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
-
-  // Transfer chain users from old loads to the new load.
-  for (LoadSDNode *L : Loads)
-    DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
-
-  if (NeedsBswap)
-    return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad);
-  else
-    return NewLoad;
-}
-
  SDValue DAGCombiner::visitXOR(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
diff --git a/test/CodeGen/ARM/load-combine-big-endian.ll b/test/CodeGen/ARM/load-combine-big-endian.ll

deleted file mode 100644 (file)

index b5f2894..0000000
--- a/test/CodeGen/ARM/load-combine-big-endian.ll
+++ /dev/null
@@ -1,234 +0,0 @@
-; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s
-; RUN: llc < %s -mtriple=arm64eb-unknown | FileCheck %s --check-prefix=CHECK64
-
-; i8* p; // p is 4 byte aligned
-; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
-define i32 @load_i32_by_i8_big_endian(i32*) {
-; CHECK-LABEL: load_i32_by_i8_big_endian:
-; CHECK: ldr r0, [r0]
-; CHECK-NEXT: mov pc, lr
-
-; CHECK64-LABEL: load_i32_by_i8_big_endian:
-; CHECK64: ldr         w0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 4
-  %4 = zext i8 %3 to i32
-  %5 = shl nuw nsw i32 %4, 24
-  %6 = getelementptr inbounds i8, i8* %2, i32 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 16
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 8
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = or i32 %15, %18
-  ret i32 %19
-}
-
-; i8* p; // p is 4 byte aligned
-; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
-define i32 @load_i32_by_i16_by_i8_big_endian(i32*) {
-; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
-; CHECK: ldr r0, [r0]
-; CHECK-NEXT: mov pc, lr
-
-; CHECK64-LABEL: load_i32_by_i16_by_i8_big_endian:
-; CHECK64: ldr         w0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 4
-  %4 = zext i8 %3 to i16
-  %5 = getelementptr inbounds i8, i8* %2, i32 1
-  %6 = load i8, i8* %5, align 1
-  %7 = zext i8 %6 to i16
-  %8 = shl nuw nsw i16 %4, 8
-  %9 = or i16 %8, %7
-  %10 = getelementptr inbounds i8, i8* %2, i32 2
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i16
-  %13 = getelementptr inbounds i8, i8* %2, i32 3
-  %14 = load i8, i8* %13, align 1
-  %15 = zext i8 %14 to i16
-  %16 = shl nuw nsw i16 %12, 8
-  %17 = or i16 %16, %15
-  %18 = zext i16 %9 to i32
-  %19 = zext i16 %17 to i32
-  %20 = shl nuw nsw i32 %18, 16
-  %21 = or i32 %20, %19
-  ret i32 %21
-}
-
-; i16* p; // p is 4 byte aligned
-; ((i32) p[0] << 16) | (i32) p[1]
-define i32 @load_i32_by_i16(i32*) {
-; CHECK-LABEL: load_i32_by_i16:
-; CHECK: ldr r0, [r0]
-; CHECK-NEXT: mov pc, lr
-
-; CHECK64-LABEL: load_i32_by_i16:
-; CHECK64: ldr         w0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i32* %0 to i16*
-  %3 = load i16, i16* %2, align 4
-  %4 = zext i16 %3 to i32
-  %5 = getelementptr inbounds i16, i16* %2, i32 1
-  %6 = load i16, i16* %5, align 1
-  %7 = zext i16 %6 to i32
-  %8 = shl nuw nsw i32 %4, 16
-  %9 = or i32 %8, %7
-  ret i32 %9
-}
-
-; i16* p_16; // p_16 is 4 byte aligned
-; i8* p_8 = (i8*) p_16;
-; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
-define i32 @load_i32_by_i16_i8(i32*) {
-; CHECK-LABEL: load_i32_by_i16_i8:
-; CHECK: ldr r0, [r0]
-; CHECK-NEXT: mov pc, lr
-
-; CHECK64-LABEL: load_i32_by_i16_i8:
-; CHECK64: ldr         w0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i32* %0 to i16*
-  %3 = bitcast i32* %0 to i8*
-  %4 = load i16, i16* %2, align 4
-  %5 = zext i16 %4 to i32
-  %6 = shl nuw nsw i32 %5, 16
-  %7 = getelementptr inbounds i8, i8* %3, i32 2
-  %8 = load i8, i8* %7, align 1
-  %9 = zext i8 %8 to i32
-  %10 = shl nuw nsw i32 %9, 8
-  %11 = getelementptr inbounds i8, i8* %3, i32 3
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = or i32 %10, %13
-  %15 = or i32 %14, %6
-  ret i32 %15
-}
-
-; i8* p; // p is 8 byte aligned
-; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
-define i64 @load_i64_by_i8_bswap(i64*) {
-; CHECK-LABEL: load_i64_by_i8_bswap:
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: orr
-; CHECK: mov pc, lr
-
-; CHECK64-LABEL: load_i64_by_i8_bswap:
-; CHECK64: ldr         x8, [x0]
-; CHECK64-NEXT: rev    x0, x8
-; CHECK64-NEXT: ret
-  %2 = bitcast i64* %0 to i8*
-  %3 = load i8, i8* %2, align 8
-  %4 = zext i8 %3 to i64
-  %5 = getelementptr inbounds i8, i8* %2, i64 1
-  %6 = load i8, i8* %5, align 1
-  %7 = zext i8 %6 to i64
-  %8 = shl nuw nsw i64 %7, 8
-  %9 = or i64 %8, %4
-  %10 = getelementptr inbounds i8, i8* %2, i64 2
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i64
-  %13 = shl nuw nsw i64 %12, 16
-  %14 = or i64 %9, %13
-  %15 = getelementptr inbounds i8, i8* %2, i64 3
-  %16 = load i8, i8* %15, align 1
-  %17 = zext i8 %16 to i64
-  %18 = shl nuw nsw i64 %17, 24
-  %19 = or i64 %14, %18
-  %20 = getelementptr inbounds i8, i8* %2, i64 4
-  %21 = load i8, i8* %20, align 1
-  %22 = zext i8 %21 to i64
-  %23 = shl nuw nsw i64 %22, 32
-  %24 = or i64 %19, %23
-  %25 = getelementptr inbounds i8, i8* %2, i64 5
-  %26 = load i8, i8* %25, align 1
-  %27 = zext i8 %26 to i64
-  %28 = shl nuw nsw i64 %27, 40
-  %29 = or i64 %24, %28
-  %30 = getelementptr inbounds i8, i8* %2, i64 6
-  %31 = load i8, i8* %30, align 1
-  %32 = zext i8 %31 to i64
-  %33 = shl nuw nsw i64 %32, 48
-  %34 = or i64 %29, %33
-  %35 = getelementptr inbounds i8, i8* %2, i64 7
-  %36 = load i8, i8* %35, align 1
-  %37 = zext i8 %36 to i64
-  %38 = shl nuw i64 %37, 56
-  %39 = or i64 %34, %38
-  ret i64 %39
-}
-
-; i8* p; // p is 8 byte aligned
-; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
-define i64 @load_i64_by_i8(i64*) {
-; CHECK-LABEL: load_i64_by_i8:
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: orr
-; CHECK: mov pc, lr
-
-; CHECK64-LABEL: load_i64_by_i8:
-; CHECK64: ldr         x0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i64* %0 to i8*
-  %3 = load i8, i8* %2, align 8
-  %4 = zext i8 %3 to i64
-  %5 = shl nuw i64 %4, 56
-  %6 = getelementptr inbounds i8, i8* %2, i64 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i64
-  %9 = shl nuw nsw i64 %8, 48
-  %10 = or i64 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i64 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i64
-  %14 = shl nuw nsw i64 %13, 40
-  %15 = or i64 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i64 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i64
-  %19 = shl nuw nsw i64 %18, 32
-  %20 = or i64 %15, %19
-  %21 = getelementptr inbounds i8, i8* %2, i64 4
-  %22 = load i8, i8* %21, align 1
-  %23 = zext i8 %22 to i64
-  %24 = shl nuw nsw i64 %23, 24
-  %25 = or i64 %20, %24
-  %26 = getelementptr inbounds i8, i8* %2, i64 5
-  %27 = load i8, i8* %26, align 1
-  %28 = zext i8 %27 to i64
-  %29 = shl nuw nsw i64 %28, 16
-  %30 = or i64 %25, %29
-  %31 = getelementptr inbounds i8, i8* %2, i64 6
-  %32 = load i8, i8* %31, align 1
-  %33 = zext i8 %32 to i64
-  %34 = shl nuw nsw i64 %33, 8
-  %35 = or i64 %30, %34
-  %36 = getelementptr inbounds i8, i8* %2, i64 7
-  %37 = load i8, i8* %36, align 1
-  %38 = zext i8 %37 to i64
-  %39 = or i64 %35, %38
-  ret i64 %39
-}
diff --git a/test/CodeGen/ARM/load-combine.ll b/test/CodeGen/ARM/load-combine.ll

deleted file mode 100644 (file)

index 7abbc4c..0000000
--- a/test/CodeGen/ARM/load-combine.ll
+++ /dev/null
@@ -1,226 +0,0 @@
-; RUN: llc < %s -mtriple=arm-unknown | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s --check-prefix=CHECK64
-
-; i8* p; // p is 1 byte aligned
-; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
-define i32 @load_i32_by_i8_unaligned(i32*) {
-; CHECK-LABEL: load_i32_by_i8_unaligned:
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: orr
-; CHECK: mov pc, lr
-
-; CHECK64-LABEL: load_i32_by_i8_unaligned:
-; CHECK64: ldr         w0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i32* %0 to i8*
-  %3 = getelementptr inbounds i8, i8* %2, i32 0
-  %4 = load i8, i8* %2, align 1
-  %5 = zext i8 %4 to i32
-  %6 = getelementptr inbounds i8, i8* %2, i32 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 8
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 16
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = shl nuw nsw i32 %18, 24
-  %20 = or i32 %15, %19
-  ret i32 %20
-}
-
-; i8* p; // p is 4 byte aligned
-; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
-define i32 @load_i32_by_i8_aligned(i32*) {
-; CHECK-LABEL: load_i32_by_i8_aligned:
-; CHECK: ldr  r0, [r0]
-; CHECK: mov pc, lr
-
-; CHECK64-LABEL: load_i32_by_i8_aligned:
-; CHECK64: ldr    w0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i32* %0 to i8*
-  %3 = getelementptr inbounds i8, i8* %2, i32 0
-  %4 = load i8, i8* %2, align 4
-  %5 = zext i8 %4 to i32
-  %6 = getelementptr inbounds i8, i8* %2, i32 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 8
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 16
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = shl nuw nsw i32 %18, 24
-  %20 = or i32 %15, %19
-  ret i32 %20
-}
-
-; i8* p; // p is 4 byte aligned
-; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
-define i32 @load_i32_by_i8_bswap(i32*) {
-; BSWAP is not supported by 32 bit target
-; CHECK-LABEL: load_i32_by_i8_bswap:
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: orr
-; CHECK: mov pc, lr
-
-; CHECK64-LABEL: load_i32_by_i8_bswap:
-; CHECK64: ldr         w8, [x0]
-; CHECK64-NEXT: rev    w0, w8
-; CHECK64-NEXT: ret
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 4
-  %4 = zext i8 %3 to i32
-  %5 = shl nuw nsw i32 %4, 24
-  %6 = getelementptr inbounds i8, i8* %2, i32 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 16
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 8
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = or i32 %15, %18
-  ret i32 %19
-}
-
-; i8* p; // p is 8 byte aligned
-; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
-define i64 @load_i64_by_i8(i64*) {
-; CHECK-LABEL: load_i64_by_i8:
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: orr
-; CHECK: mov pc, lr
-
-; CHECK64-LABEL: load_i64_by_i8:
-; CHECK64: ldr         x0, [x0]
-; CHECK64-NEXT: ret
-  %2 = bitcast i64* %0 to i8*
-  %3 = load i8, i8* %2, align 8
-  %4 = zext i8 %3 to i64
-  %5 = getelementptr inbounds i8, i8* %2, i64 1
-  %6 = load i8, i8* %5, align 1
-  %7 = zext i8 %6 to i64
-  %8 = shl nuw nsw i64 %7, 8
-  %9 = or i64 %8, %4
-  %10 = getelementptr inbounds i8, i8* %2, i64 2
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i64
-  %13 = shl nuw nsw i64 %12, 16
-  %14 = or i64 %9, %13
-  %15 = getelementptr inbounds i8, i8* %2, i64 3
-  %16 = load i8, i8* %15, align 1
-  %17 = zext i8 %16 to i64
-  %18 = shl nuw nsw i64 %17, 24
-  %19 = or i64 %14, %18
-  %20 = getelementptr inbounds i8, i8* %2, i64 4
-  %21 = load i8, i8* %20, align 1
-  %22 = zext i8 %21 to i64
-  %23 = shl nuw nsw i64 %22, 32
-  %24 = or i64 %19, %23
-  %25 = getelementptr inbounds i8, i8* %2, i64 5
-  %26 = load i8, i8* %25, align 1
-  %27 = zext i8 %26 to i64
-  %28 = shl nuw nsw i64 %27, 40
-  %29 = or i64 %24, %28
-  %30 = getelementptr inbounds i8, i8* %2, i64 6
-  %31 = load i8, i8* %30, align 1
-  %32 = zext i8 %31 to i64
-  %33 = shl nuw nsw i64 %32, 48
-  %34 = or i64 %29, %33
-  %35 = getelementptr inbounds i8, i8* %2, i64 7
-  %36 = load i8, i8* %35, align 1
-  %37 = zext i8 %36 to i64
-  %38 = shl nuw i64 %37, 56
-  %39 = or i64 %34, %38
-  ret i64 %39
-}
-
-; i8* p; // p is 8 byte aligned
-; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
-define i64 @load_i64_by_i8_bswap(i64*) {
-; CHECK-LABEL: load_i64_by_i8_bswap:
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: ldrb{{.*}}r0
-; CHECK: orr
-; CHECK: mov pc, lr
-
-; CHECK64-LABEL: load_i64_by_i8_bswap:
-; CHECK64: ldr         x8, [x0]
-; CHECK64-NEXT: rev    x0, x8
-; CHECK64-NEXT: ret
-  %2 = bitcast i64* %0 to i8*
-  %3 = load i8, i8* %2, align 8
-  %4 = zext i8 %3 to i64
-  %5 = shl nuw i64 %4, 56
-  %6 = getelementptr inbounds i8, i8* %2, i64 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i64
-  %9 = shl nuw nsw i64 %8, 48
-  %10 = or i64 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i64 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i64
-  %14 = shl nuw nsw i64 %13, 40
-  %15 = or i64 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i64 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i64
-  %19 = shl nuw nsw i64 %18, 32
-  %20 = or i64 %15, %19
-  %21 = getelementptr inbounds i8, i8* %2, i64 4
-  %22 = load i8, i8* %21, align 1
-  %23 = zext i8 %22 to i64
-  %24 = shl nuw nsw i64 %23, 24
-  %25 = or i64 %20, %24
-  %26 = getelementptr inbounds i8, i8* %2, i64 5
-  %27 = load i8, i8* %26, align 1
-  %28 = zext i8 %27 to i64
-  %29 = shl nuw nsw i64 %28, 16
-  %30 = or i64 %25, %29
-  %31 = getelementptr inbounds i8, i8* %2, i64 6
-  %32 = load i8, i8* %31, align 1
-  %33 = zext i8 %32 to i64
-  %34 = shl nuw nsw i64 %33, 8
-  %35 = or i64 %30, %34
-  %36 = getelementptr inbounds i8, i8* %2, i64 7
-  %37 = load i8, i8* %36, align 1
-  %38 = zext i8 %37 to i64
-  %39 = or i64 %35, %38
-  ret i64 %39
-}
diff --git a/test/CodeGen/X86/load-combine.ll b/test/CodeGen/X86/load-combine.ll

deleted file mode 100644 (file)

index ffcc0a9..0000000
--- a/test/CodeGen/X86/load-combine.ll
+++ /dev/null
@@ -1,733 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64
-
-; i8* p;
-; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
-define i32 @load_i32_by_i8(i32*) {
-; CHECK-LABEL: load_i32_by_i8:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl (%eax), %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 1
-  %4 = zext i8 %3 to i32
-  %5 = getelementptr inbounds i8, i8* %2, i32 1
-  %6 = load i8, i8* %5, align 1
-  %7 = zext i8 %6 to i32
-  %8 = shl nuw nsw i32 %7, 8
-  %9 = or i32 %8, %4
-  %10 = getelementptr inbounds i8, i8* %2, i32 2
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i32
-  %13 = shl nuw nsw i32 %12, 16
-  %14 = or i32 %9, %13
-  %15 = getelementptr inbounds i8, i8* %2, i32 3
-  %16 = load i8, i8* %15, align 1
-  %17 = zext i8 %16 to i32
-  %18 = shl nuw nsw i32 %17, 24
-  %19 = or i32 %14, %18
-  ret i32 %19
-}
-
-; i8* p;
-; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
-define i32 @load_i32_by_i8_bswap(i32*) {
-; CHECK-LABEL: load_i32_by_i8_bswap:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl (%eax), %eax
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8_bswap:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    bswapl %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 1
-  %4 = zext i8 %3 to i32
-  %5 = shl nuw nsw i32 %4, 24
-  %6 = getelementptr inbounds i8, i8* %2, i32 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 16
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 8
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = or i32 %15, %18
-  ret i32 %19
-}
-
-; i16* p;
-; (i32) p[0] | ((i32) p[1] << 16)
-define i32 @load_i32_by_i16(i32*) {
-; CHECK-LABEL: load_i32_by_i16:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl (%eax), %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i16:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i16*
-  %3 = load i16, i16* %2, align 1
-  %4 = zext i16 %3 to i32
-  %5 = getelementptr inbounds i16, i16* %2, i32 1
-  %6 = load i16, i16* %5, align 1
-  %7 = zext i16 %6 to i32
-  %8 = shl nuw nsw i32 %7, 16
-  %9 = or i32 %8, %4
-  ret i32 %9
-}
-
-; i16* p_16;
-; i8* p_8 = (i8*) p_16;
-; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
-define i32 @load_i32_by_i16_i8(i32*) {
-; CHECK-LABEL: load_i32_by_i16_i8:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl (%eax), %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i16_i8:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i16*
-  %3 = bitcast i32* %0 to i8*
-  %4 = load i16, i16* %2, align 1
-  %5 = zext i16 %4 to i32
-  %6 = getelementptr inbounds i8, i8* %3, i32 2
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 16
-  %10 = getelementptr inbounds i8, i8* %3, i32 3
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i32
-  %13 = shl nuw nsw i32 %12, 24
-  %14 = or i32 %9, %13
-  %15 = or i32 %14, %5
-  ret i32 %15
-}
-
-
-; i8* p;
-; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
-define i32 @load_i32_by_i16_by_i8(i32*) {
-; CHECK-LABEL: load_i32_by_i16_by_i8:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl (%eax), %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i16_by_i8:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 1
-  %4 = zext i8 %3 to i16
-  %5 = getelementptr inbounds i8, i8* %2, i32 1
-  %6 = load i8, i8* %5, align 1
-  %7 = zext i8 %6 to i16
-  %8 = shl nuw nsw i16 %7, 8
-  %9 = or i16 %8, %4
-  %10 = getelementptr inbounds i8, i8* %2, i32 2
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i16
-  %13 = getelementptr inbounds i8, i8* %2, i32 3
-  %14 = load i8, i8* %13, align 1
-  %15 = zext i8 %14 to i16
-  %16 = shl nuw nsw i16 %15, 8
-  %17 = or i16 %16, %12
-  %18 = zext i16 %9 to i32
-  %19 = zext i16 %17 to i32
-  %20 = shl nuw nsw i32 %19, 16
-  %21 = or i32 %20, %18
-  ret i32 %21
-}
-
-; i8* p;
-; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
-define i32 @load_i32_by_i16_by_i8_bswap(i32*) {
-; CHECK-LABEL: load_i32_by_i16_by_i8_bswap:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl (%eax), %eax
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i16_by_i8_bswap:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    bswapl %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 1
-  %4 = zext i8 %3 to i16
-  %5 = getelementptr inbounds i8, i8* %2, i32 1
-  %6 = load i8, i8* %5, align 1
-  %7 = zext i8 %6 to i16
-  %8 = shl nuw nsw i16 %4, 8
-  %9 = or i16 %8, %7
-  %10 = getelementptr inbounds i8, i8* %2, i32 2
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i16
-  %13 = getelementptr inbounds i8, i8* %2, i32 3
-  %14 = load i8, i8* %13, align 1
-  %15 = zext i8 %14 to i16
-  %16 = shl nuw nsw i16 %12, 8
-  %17 = or i16 %16, %15
-  %18 = zext i16 %9 to i32
-  %19 = zext i16 %17 to i32
-  %20 = shl nuw nsw i32 %18, 16
-  %21 = or i32 %20, %19
-  ret i32 %21
-}
-
-; i8* p;
-; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
-define i64 @load_i64_by_i8(i64*) {
-; CHECK-LABEL: load_i64_by_i8:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    pushl %edi
-; CHECK-NEXT:  .Lcfi0:
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:  .Lcfi1:
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:  .Lcfi2:
-; CHECK-NEXT:    .cfi_offset %esi, -12
-; CHECK-NEXT:  .Lcfi3:
-; CHECK-NEXT:    .cfi_offset %edi, -8
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movzbl (%ecx), %eax
-; CHECK-NEXT:    movzbl 1(%ecx), %edx
-; CHECK-NEXT:    shll $8, %edx
-; CHECK-NEXT:    orl %eax, %edx
-; CHECK-NEXT:    movzbl 2(%ecx), %esi
-; CHECK-NEXT:    shll $16, %esi
-; CHECK-NEXT:    orl %edx, %esi
-; CHECK-NEXT:    movzbl 3(%ecx), %eax
-; CHECK-NEXT:    shll $24, %eax
-; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    movzbl 4(%ecx), %edx
-; CHECK-NEXT:    movzbl 5(%ecx), %esi
-; CHECK-NEXT:    shll $8, %esi
-; CHECK-NEXT:    orl %edx, %esi
-; CHECK-NEXT:    movzbl 6(%ecx), %edi
-; CHECK-NEXT:    shll $16, %edi
-; CHECK-NEXT:    orl %esi, %edi
-; CHECK-NEXT:    movzbl 7(%ecx), %edx
-; CHECK-NEXT:    shll $24, %edx
-; CHECK-NEXT:    orl %edi, %edx
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    popl %edi
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i64_by_i8:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movq (%rdi), %rax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i64* %0 to i8*
-  %3 = load i8, i8* %2, align 1
-  %4 = zext i8 %3 to i64
-  %5 = getelementptr inbounds i8, i8* %2, i64 1
-  %6 = load i8, i8* %5, align 1
-  %7 = zext i8 %6 to i64
-  %8 = shl nuw nsw i64 %7, 8
-  %9 = or i64 %8, %4
-  %10 = getelementptr inbounds i8, i8* %2, i64 2
-  %11 = load i8, i8* %10, align 1
-  %12 = zext i8 %11 to i64
-  %13 = shl nuw nsw i64 %12, 16
-  %14 = or i64 %9, %13
-  %15 = getelementptr inbounds i8, i8* %2, i64 3
-  %16 = load i8, i8* %15, align 1
-  %17 = zext i8 %16 to i64
-  %18 = shl nuw nsw i64 %17, 24
-  %19 = or i64 %14, %18
-  %20 = getelementptr inbounds i8, i8* %2, i64 4
-  %21 = load i8, i8* %20, align 1
-  %22 = zext i8 %21 to i64
-  %23 = shl nuw nsw i64 %22, 32
-  %24 = or i64 %19, %23
-  %25 = getelementptr inbounds i8, i8* %2, i64 5
-  %26 = load i8, i8* %25, align 1
-  %27 = zext i8 %26 to i64
-  %28 = shl nuw nsw i64 %27, 40
-  %29 = or i64 %24, %28
-  %30 = getelementptr inbounds i8, i8* %2, i64 6
-  %31 = load i8, i8* %30, align 1
-  %32 = zext i8 %31 to i64
-  %33 = shl nuw nsw i64 %32, 48
-  %34 = or i64 %29, %33
-  %35 = getelementptr inbounds i8, i8* %2, i64 7
-  %36 = load i8, i8* %35, align 1
-  %37 = zext i8 %36 to i64
-  %38 = shl nuw i64 %37, 56
-  %39 = or i64 %34, %38
-  ret i64 %39
-}
-
-; i8* p;
-; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
-define i64 @load_i64_by_i8_bswap(i64*) {
-; CHECK-LABEL: load_i64_by_i8_bswap:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:  .Lcfi4:
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:  .Lcfi5:
-; CHECK-NEXT:    .cfi_offset %esi, -8
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzbl (%eax), %ecx
-; CHECK-NEXT:    shll $24, %ecx
-; CHECK-NEXT:    movzbl 1(%eax), %edx
-; CHECK-NEXT:    shll $16, %edx
-; CHECK-NEXT:    orl %ecx, %edx
-; CHECK-NEXT:    movzbl 2(%eax), %ecx
-; CHECK-NEXT:    shll $8, %ecx
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movzbl 3(%eax), %edx
-; CHECK-NEXT:    orl %ecx, %edx
-; CHECK-NEXT:    movzbl 4(%eax), %ecx
-; CHECK-NEXT:    shll $24, %ecx
-; CHECK-NEXT:    movzbl 5(%eax), %esi
-; CHECK-NEXT:    shll $16, %esi
-; CHECK-NEXT:    orl %ecx, %esi
-; CHECK-NEXT:    movzbl 6(%eax), %ecx
-; CHECK-NEXT:    shll $8, %ecx
-; CHECK-NEXT:    orl %esi, %ecx
-; CHECK-NEXT:    movzbl 7(%eax), %eax
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i64_by_i8_bswap:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movq (%rdi), %rax
-; CHECK64-NEXT:    bswapq %rax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i64* %0 to i8*
-  %3 = load i8, i8* %2, align 1
-  %4 = zext i8 %3 to i64
-  %5 = shl nuw i64 %4, 56
-  %6 = getelementptr inbounds i8, i8* %2, i64 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i64
-  %9 = shl nuw nsw i64 %8, 48
-  %10 = or i64 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i64 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i64
-  %14 = shl nuw nsw i64 %13, 40
-  %15 = or i64 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i64 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i64
-  %19 = shl nuw nsw i64 %18, 32
-  %20 = or i64 %15, %19
-  %21 = getelementptr inbounds i8, i8* %2, i64 4
-  %22 = load i8, i8* %21, align 1
-  %23 = zext i8 %22 to i64
-  %24 = shl nuw nsw i64 %23, 24
-  %25 = or i64 %20, %24
-  %26 = getelementptr inbounds i8, i8* %2, i64 5
-  %27 = load i8, i8* %26, align 1
-  %28 = zext i8 %27 to i64
-  %29 = shl nuw nsw i64 %28, 16
-  %30 = or i64 %25, %29
-  %31 = getelementptr inbounds i8, i8* %2, i64 6
-  %32 = load i8, i8* %31, align 1
-  %33 = zext i8 %32 to i64
-  %34 = shl nuw nsw i64 %33, 8
-  %35 = or i64 %30, %34
-  %36 = getelementptr inbounds i8, i8* %2, i64 7
-  %37 = load i8, i8* %36, align 1
-  %38 = zext i8 %37 to i64
-  %39 = or i64 %35, %38
-  ret i64 %39
-}
-
-; Part of the load by bytes pattern is used outside of the pattern
-; i8* p;
-; i32 x = (i32) p[1]
-; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
-; x | res
-define i32 @load_i32_by_i8_bswap_uses(i32*) {
-; CHECK-LABEL: load_i32_by_i8_bswap_uses:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:  .Lcfi6:
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:  .Lcfi7:
-; CHECK-NEXT:    .cfi_offset %esi, -8
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzbl (%eax), %ecx
-; CHECK-NEXT:    shll $24, %ecx
-; CHECK-NEXT:    movzbl 1(%eax), %edx
-; CHECK-NEXT:    movl %edx, %esi
-; CHECK-NEXT:    shll $16, %esi
-; CHECK-NEXT:    orl %ecx, %esi
-; CHECK-NEXT:    movzbl 2(%eax), %ecx
-; CHECK-NEXT:    shll $8, %ecx
-; CHECK-NEXT:    orl %esi, %ecx
-; CHECK-NEXT:    movzbl 3(%eax), %eax
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    orl %edx, %eax
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movzbl (%rdi), %eax
-; CHECK64-NEXT:    shll $24, %eax
-; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
-; CHECK64-NEXT:    movl %ecx, %edx
-; CHECK64-NEXT:    shll $16, %edx
-; CHECK64-NEXT:    orl %eax, %edx
-; CHECK64-NEXT:    movzbl 2(%rdi), %esi
-; CHECK64-NEXT:    shll $8, %esi
-; CHECK64-NEXT:    orl %edx, %esi
-; CHECK64-NEXT:    movzbl 3(%rdi), %eax
-; CHECK64-NEXT:    orl %esi, %eax
-; CHECK64-NEXT:    orl %ecx, %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i8*
-  %3 = load i8, i8* %2, align 1
-  %4 = zext i8 %3 to i32
-  %5 = shl nuw nsw i32 %4, 24
-  %6 = getelementptr inbounds i8, i8* %2, i32 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 16
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 8
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = or i32 %15, %18
-  ; Use individual part of the pattern outside of the pattern
-  %20 = or i32 %8, %19
-  ret i32 %20
-}
-
-; One of the loads is volatile
-; i8* p;
-; p0 = volatile *p;
-; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
-define i32 @load_i32_by_i8_bswap_volatile(i32*) {
-; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzbl (%eax), %ecx
-; CHECK-NEXT:    shll $24, %ecx
-; CHECK-NEXT:    movzbl 1(%eax), %edx
-; CHECK-NEXT:    shll $16, %edx
-; CHECK-NEXT:    orl %ecx, %edx
-; CHECK-NEXT:    movzbl 2(%eax), %ecx
-; CHECK-NEXT:    shll $8, %ecx
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movzbl 3(%eax), %eax
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movzbl (%rdi), %eax
-; CHECK64-NEXT:    shll $24, %eax
-; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
-; CHECK64-NEXT:    shll $16, %ecx
-; CHECK64-NEXT:    orl %eax, %ecx
-; CHECK64-NEXT:    movzbl 2(%rdi), %edx
-; CHECK64-NEXT:    shll $8, %edx
-; CHECK64-NEXT:    orl %ecx, %edx
-; CHECK64-NEXT:    movzbl 3(%rdi), %eax
-; CHECK64-NEXT:    orl %edx, %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i8*
-  %3 = load volatile i8, i8* %2, align 1
-  %4 = zext i8 %3 to i32
-  %5 = shl nuw nsw i32 %4, 24
-  %6 = getelementptr inbounds i8, i8* %2, i32 1
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 16
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 2
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 8
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 3
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = or i32 %15, %18
-  ret i32 %19
-}
-
-; There is a store in between individual loads
-; i8* p, q;
-; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
-; *q = 0;
-; res2 = ((i32) p[2] << 8) | (i32) p[3]
-; res1 | res2
-define i32 @load_i32_by_i8_bswap_store_in_between(i32*, i32*) {
-; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:  .Lcfi8:
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:  .Lcfi9:
-; CHECK-NEXT:    .cfi_offset %esi, -8
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movzbl (%ecx), %edx
-; CHECK-NEXT:    shll $24, %edx
-; CHECK-NEXT:    movzbl 1(%ecx), %esi
-; CHECK-NEXT:    movl $0, (%eax)
-; CHECK-NEXT:    shll $16, %esi
-; CHECK-NEXT:    orl %edx, %esi
-; CHECK-NEXT:    movzbl 2(%ecx), %edx
-; CHECK-NEXT:    shll $8, %edx
-; CHECK-NEXT:    orl %esi, %edx
-; CHECK-NEXT:    movzbl 3(%ecx), %eax
-; CHECK-NEXT:    orl %edx, %eax
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movzbl (%rdi), %eax
-; CHECK64-NEXT:    shll $24, %eax
-; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
-; CHECK64-NEXT:    movl $0, (%rsi)
-; CHECK64-NEXT:    shll $16, %ecx
-; CHECK64-NEXT:    orl %eax, %ecx
-; CHECK64-NEXT:    movzbl 2(%rdi), %edx
-; CHECK64-NEXT:    shll $8, %edx
-; CHECK64-NEXT:    orl %ecx, %edx
-; CHECK64-NEXT:    movzbl 3(%rdi), %eax
-; CHECK64-NEXT:    orl %edx, %eax
-; CHECK64-NEXT:    retq
-
-  %3 = bitcast i32* %0 to i8*
-  %4 = load i8, i8* %3, align 1
-  %5 = zext i8 %4 to i32
-  %6 = shl nuw nsw i32 %5, 24
-  %7 = getelementptr inbounds i8, i8* %3, i32 1
-  %8 = load i8, i8* %7, align 1
-  ; This store will prevent folding of the pattern
-  store i32 0, i32* %1
-  %9 = zext i8 %8 to i32
-  %10 = shl nuw nsw i32 %9, 16
-  %11 = or i32 %10, %6
-  %12 = getelementptr inbounds i8, i8* %3, i32 2
-  %13 = load i8, i8* %12, align 1
-  %14 = zext i8 %13 to i32
-  %15 = shl nuw nsw i32 %14, 8
-  %16 = or i32 %11, %15
-  %17 = getelementptr inbounds i8, i8* %3, i32 3
-  %18 = load i8, i8* %17, align 1
-  %19 = zext i8 %18 to i32
-  %20 = or i32 %16, %19
-  ret i32 %20
-}
-
-; One of the loads is from an unrelated location
-; i8* p, q;
-; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
-define i32 @load_i32_by_i8_bswap_unrelated_load(i32*, i32*) {
-; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movzbl (%ecx), %edx
-; CHECK-NEXT:    shll $24, %edx
-; CHECK-NEXT:    movzbl 1(%eax), %eax
-; CHECK-NEXT:    shll $16, %eax
-; CHECK-NEXT:    orl %edx, %eax
-; CHECK-NEXT:    movzbl 2(%ecx), %edx
-; CHECK-NEXT:    shll $8, %edx
-; CHECK-NEXT:    orl %eax, %edx
-; CHECK-NEXT:    movzbl 3(%ecx), %eax
-; CHECK-NEXT:    orl %edx, %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movzbl (%rdi), %eax
-; CHECK64-NEXT:    shll $24, %eax
-; CHECK64-NEXT:    movzbl 1(%rsi), %ecx
-; CHECK64-NEXT:    shll $16, %ecx
-; CHECK64-NEXT:    orl %eax, %ecx
-; CHECK64-NEXT:    movzbl 2(%rdi), %edx
-; CHECK64-NEXT:    shll $8, %edx
-; CHECK64-NEXT:    orl %ecx, %edx
-; CHECK64-NEXT:    movzbl 3(%rdi), %eax
-; CHECK64-NEXT:    orl %edx, %eax
-; CHECK64-NEXT:    retq
-
-  %3 = bitcast i32* %0 to i8*
-  %4 = bitcast i32* %1 to i8*
-  %5 = load i8, i8* %3, align 1
-  %6 = zext i8 %5 to i32
-  %7 = shl nuw nsw i32 %6, 24
-  ; Load from an unrelated address
-  %8 = getelementptr inbounds i8, i8* %4, i32 1
-  %9 = load i8, i8* %8, align 1
-  %10 = zext i8 %9 to i32
-  %11 = shl nuw nsw i32 %10, 16
-  %12 = or i32 %11, %7
-  %13 = getelementptr inbounds i8, i8* %3, i32 2
-  %14 = load i8, i8* %13, align 1
-  %15 = zext i8 %14 to i32
-  %16 = shl nuw nsw i32 %15, 8
-  %17 = or i32 %12, %16
-  %18 = getelementptr inbounds i8, i8* %3, i32 3
-  %19 = load i8, i8* %18, align 1
-  %20 = zext i8 %19 to i32
-  %21 = or i32 %17, %20
-  ret i32 %21
-}
-
-; Non-zero offsets are not supported for now
-; i8* p;
-; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
-define i32 @load_i32_by_i8_unsupported_offset(i32*) {
-; CHECK-LABEL: load_i32_by_i8_unsupported_offset:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzbl (%eax), %ecx
-; CHECK-NEXT:    movzbl 2(%eax), %edx
-; CHECK-NEXT:    shll $8, %edx
-; CHECK-NEXT:    orl %ecx, %edx
-; CHECK-NEXT:    movzbl 3(%eax), %ecx
-; CHECK-NEXT:    shll $16, %ecx
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movzbl 4(%eax), %eax
-; CHECK-NEXT:    shll $24, %eax
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8_unsupported_offset:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movzbl (%rdi), %eax
-; CHECK64-NEXT:    movzbl 2(%rdi), %ecx
-; CHECK64-NEXT:    shll $8, %ecx
-; CHECK64-NEXT:    orl %eax, %ecx
-; CHECK64-NEXT:    movzbl 3(%rdi), %edx
-; CHECK64-NEXT:    shll $16, %edx
-; CHECK64-NEXT:    orl %ecx, %edx
-; CHECK64-NEXT:    movzbl 4(%rdi), %eax
-; CHECK64-NEXT:    shll $24, %eax
-; CHECK64-NEXT:    orl %edx, %eax
-; CHECK64-NEXT:    retq
-
-  %2 = bitcast i32* %0 to i8*
-  %3 = getelementptr inbounds i8, i8* %2, i32 1
-  %4 = load i8, i8* %2, align 1
-  %5 = zext i8 %4 to i32
-  %6 = getelementptr inbounds i8, i8* %2, i32 2
-  %7 = load i8, i8* %6, align 1
-  %8 = zext i8 %7 to i32
-  %9 = shl nuw nsw i32 %8, 8
-  %10 = or i32 %9, %5
-  %11 = getelementptr inbounds i8, i8* %2, i32 3
-  %12 = load i8, i8* %11, align 1
-  %13 = zext i8 %12 to i32
-  %14 = shl nuw nsw i32 %13, 16
-  %15 = or i32 %10, %14
-  %16 = getelementptr inbounds i8, i8* %2, i32 4
-  %17 = load i8, i8* %16, align 1
-  %18 = zext i8 %17 to i32
-  %19 = shl nuw nsw i32 %18, 24
-  %20 = or i32 %15, %19
-  ret i32 %20
-}
-
-; i8* p; i32 i;
-; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
-define i32 @load_i32_by_i8_bswap_base_index_offset(i32*, i32) {
-; CHECK-LABEL: load_i32_by_i8_bswap_base_index_offset:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl (%ecx,%eax), %eax
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    retl
-;
-; CHECK64-LABEL: load_i32_by_i8_bswap_base_index_offset:
-; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movslq %esi, %rax
-; CHECK64-NEXT:    movzbl (%rdi,%rax), %ecx
-; CHECK64-NEXT:    shll $24, %ecx
-; CHECK64-NEXT:    movzbl 1(%rdi,%rax), %edx
-; CHECK64-NEXT:    shll $16, %edx
-; CHECK64-NEXT:    orl %ecx, %edx
-; CHECK64-NEXT:    movzbl 2(%rdi,%rax), %ecx
-; CHECK64-NEXT:    shll $8, %ecx
-; CHECK64-NEXT:    orl %edx, %ecx
-; CHECK64-NEXT:    movzbl 3(%rdi,%rax), %eax
-; CHECK64-NEXT:    orl %ecx, %eax
-; CHECK64-NEXT:    retq
-; Currently we don't fold the pattern for x86-64 target because we don't see
-; that the loads are adjacent. It happens because BaseIndexOffset doesn't look
-; through zexts.
-
-  %3 = bitcast i32* %0 to i8*
-  %4 = getelementptr inbounds i8, i8* %3, i32 %1
-  %5 = load i8, i8* %4, align 1
-  %6 = zext i8 %5 to i32
-  %7 = shl nuw nsw i32 %6, 24
-  %8 = add nuw nsw i32 %1, 1
-  %9 = getelementptr inbounds i8, i8* %3, i32 %8
-  %10 = load i8, i8* %9, align 1
-  %11 = zext i8 %10 to i32
-  %12 = shl nuw nsw i32 %11, 16
-  %13 = or i32 %12, %7
-  %14 = add nuw nsw i32 %1, 2
-  %15 = getelementptr inbounds i8, i8* %3, i32 %14
-  %16 = load i8, i8* %15, align 1
-  %17 = zext i8 %16 to i32
-  %18 = shl nuw nsw i32 %17, 8
-  %19 = or i32 %13, %18
-  %20 = add nuw nsw i32 %1, 3
-  %21 = getelementptr inbounds i8, i8* %3, i32 %20
-  %22 = load i8, i8* %21, align 1
-  %23 = zext i8 %22 to i32
-  %24 = or i32 %19, %23
-  ret i32 %24
-}
author	Chandler Carruth <chandlerc@gmail.com>
	Fri, 16 Dec 2016 04:05:22 +0000 (04:05 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Fri, 16 Dec 2016 04:05:22 +0000 (04:05 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/ARM/load-combine-big-endian.ll	[deleted file]	patch \| blob \| history
test/CodeGen/ARM/load-combine.ll	[deleted file]	patch \| blob \| history
test/CodeGen/X86/load-combine.ll	[deleted file]	patch \| blob \| history