namespace {
struct PointerOffsetPair {
Value *Pointer;
- uint64_t Offset;
+ int64_t Offset;
};
struct LoadPOPPair {
unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
APInt Offset(BitWidth, 0);
if (GEP->accumulateConstantOffset(DL, Offset))
- POP.Offset += Offset.getZExtValue();
+ POP.Offset += Offset.getSExtValue();
else
// Can't handle GEPs with variable indices.
return POP;
LoadInst *BaseLoad = nullptr;
SmallVector<LoadPOPPair, 8> AggregateLoads;
bool Combined = false;
- uint64_t PrevOffset = -1ull;
+ bool ValidPrevOffset = false;
+ int64_t PrevOffset = 0;
uint64_t PrevSize = 0;
for (auto &L : Loads) {
- if (PrevOffset == -1ull) {
+ if (ValidPrevOffset == false) {
BaseLoad = L.Load;
PrevOffset = L.POP.Offset;
PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
L.Load->getType());
AggregateLoads.push_back(L);
+ ValidPrevOffset = true;
continue;
}
if (L.Load->getAlignment() > BaseLoad->getAlignment())
continue;
- if (L.POP.Offset > PrevOffset + PrevSize) {
+ int64_t PrevEnd = PrevOffset + PrevSize;
+ if (L.POP.Offset > PrevEnd) {
// No other load will be combinable
if (combineLoads(AggregateLoads))
Combined = true;
AggregateLoads.clear();
- PrevOffset = -1;
+ ValidPrevOffset = false;
continue;
}
- if (L.POP.Offset != PrevOffset + PrevSize)
+ if (L.POP.Offset != PrevEnd)
// This load is offset less than the size of the last load.
// FIXME: We may want to handle this case.
continue;
--- /dev/null
+; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @Load_NegGep(i32* %i){
+ %1 = getelementptr inbounds i32, i32* %i, i64 -1
+ %2 = load i32, i32* %1, align 4
+ %3 = load i32, i32* %i, align 4
+ %4 = add nsw i32 %3, %2
+ ret i32 %4
+; CHECK-LABEL: @Load_NegGep(
+; CHECK: %[[load:.*]] = load i64
+; CHECK: %[[combine_extract_lo:.*]] = trunc i64 %[[load]] to i32
+; CHECK: %[[combine_extract_shift:.*]] = lshr i64 %[[load]], 32
+; CHECK: %[[combine_extract_hi:.*]] = trunc i64 %[[combine_extract_shift]] to i32
+; CHECK: %[[add:.*]] = add nsw i32 %[[combine_extract_hi]], %[[combine_extract_lo]]
+}
+
+