enqueueUsers(BC);
}
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ enqueueUsers(ASC);
+ }
+
void visitPtrToIntInst(PtrToIntInst &I) {
PI.setEscaped(&I);
}
if (!IsOffsetKnown)
return false;
- return GEPI.accumulateConstantOffset(DL, Offset);
+ APInt TmpOffset(DL.getIndexTypeSizeInBits(GEPI.getType()), 0);
+ if (GEPI.accumulateConstantOffset(DL, TmpOffset)) {
+ Offset += TmpOffset.sextOrTrunc(Offset.getBitWidth());
+ return true;
+ }
+
+ return false;
}
return Base::visitBitCastInst(BC);
}
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ if (ASC.use_empty())
+ return markAsDead(ASC);
+
+ return Base::visitAddrSpaceCastInst(ASC);
+ }
+
void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (GEPI.use_empty())
return markAsDead(GEPI);
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- const DataLayout &DL = LI.getModule()->getDataLayout();
+ if (LI.isVolatile() &&
+ LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&LI);
+
uint64_t Size = DL.getTypeStoreSize(LI.getType());
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
if (!IsOffsetKnown)
return PI.setAborted(&SI);
- const DataLayout &DL = SI.getModule()->getDataLayout();
+ if (SI.isVolatile() &&
+ SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&SI);
+
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // Don't replace this with a store with a different address space. TODO:
+ // Use a store with the casted new alloca?
+ if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&II);
+
insertUse(II, Offset, Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
(bool)Length);
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // Don't replace this with a load/store with a different address space.
+ // TODO: Use a store with the casted new alloca?
+ if (II.isVolatile() &&
+ (II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
+ II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
+ return PI.setAborted(&II);
+
// This side of the transfer is completely out-of-bounds, and so we can
// nuke the entire transfer. However, we also need to nuke the other side
// if already added to our partitions.
if (!GEP->hasAllZeroIndices())
return GEP;
} else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
- !isa<SelectInst>(I)) {
+ !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
return I;
}
Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
- Type *TargetTy = PointerTy->getPointerElementType();
+ PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
+ Type *TargetTy = TargetPtrTy->getElementType();
do {
// First fold any existing GEPs into the offset.
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
- if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
+ if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
+ Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
+ TargetPtrTy,
+ NamePrefix + "sroa_cast");
+ }
return Ptr;
}
continue;
}
- assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+ assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
+ isa<PHINode>(I) || isa<SelectInst>(I) ||
+ isa<GetElementPtrInst>(I));
for (User *U : I->users())
if (Visited.insert(cast<Instruction>(U)).second)
Uses.push_back(cast<Instruction>(U));
return false;
}
+ bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ enqueueUsers(ASC);
+ return false;
+ }
+
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
enqueueUsers(GEPI);
return false;
define i64 @alloca_addrspacecast_bitcast(i64 %X) {
; CHECK-LABEL: @alloca_addrspacecast_bitcast(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8]
-; CHECK-NEXT: [[A_CAST:%.*]] = addrspacecast [8 x i8]* [[A]] to [8 x i8] addrspace(1)*
-; CHECK-NEXT: [[B:%.*]] = bitcast [8 x i8] addrspace(1)* [[A_CAST]] to i64 addrspace(1)*
-; CHECK-NEXT: store i64 [[X:%.*]], i64 addrspace(1)* [[B]]
-; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[B]]
-; CHECK-NEXT: ret i64 [[Z]]
+; CHECK-NEXT: ret i64 [[X:%.*]]
;
entry:
%A = alloca [8 x i8]
define i64 @alloca_bitcast_addrspacecast(i64 %X) {
; CHECK-LABEL: @alloca_bitcast_addrspacecast(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8]
-; CHECK-NEXT: [[A_CAST:%.*]] = bitcast [8 x i8]* [[A]] to i64*
-; CHECK-NEXT: [[B:%.*]] = addrspacecast i64* [[A_CAST]] to i64 addrspace(1)*
-; CHECK-NEXT: store i64 [[X:%.*]], i64 addrspace(1)* [[B]]
-; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[B]]
-; CHECK-NEXT: ret i64 [[Z]]
+; CHECK-NEXT: ret i64 [[X:%.*]]
;
entry:
%A = alloca [8 x i8]
define i64 @alloca_addrspacecast_gep(i64 %X) {
; CHECK-LABEL: @alloca_addrspacecast_gep(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A_AS0:%.*]] = alloca [256 x i8], align 4
-; CHECK-NEXT: [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32
-; CHECK-NEXT: [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64*
-; CHECK-NEXT: store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4
-; CHECK-NEXT: [[A_AS1:%.*]] = addrspacecast [256 x i8]* [[A_AS0]] to [256 x i8] addrspace(1)*
-; CHECK-NEXT: [[GEPA_AS1:%.*]] = getelementptr [256 x i8], [256 x i8] addrspace(1)* [[A_AS1]], i16 0, i16 32
-; CHECK-NEXT: [[GEPA_AS1_BC:%.*]] = bitcast i8 addrspace(1)* [[GEPA_AS1]] to i64 addrspace(1)*
-; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPA_AS1_BC]], align 4
-; CHECK-NEXT: ret i64 [[Z]]
+; CHECK-NEXT: ret i64 [[X:%.*]]
;
entry:
%A.as0 = alloca [256 x i8], align 4
define i64 @alloca_gep_addrspacecast(i64 %X) {
; CHECK-LABEL: @alloca_gep_addrspacecast(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A_AS0:%.*]] = alloca [256 x i8], align 4
-; CHECK-NEXT: [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32
-; CHECK-NEXT: [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64*
-; CHECK-NEXT: store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4
-; CHECK-NEXT: [[GEPA_AS1_BC:%.*]] = addrspacecast i64* [[GEPA_AS0_BC]] to i64 addrspace(1)*
-; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPA_AS1_BC]], align 4
-; CHECK-NEXT: ret i64 [[Z]]
+; CHECK-NEXT: ret i64 [[X:%.*]]
;
entry:
%A.as0 = alloca [256 x i8], align 4
define i64 @alloca_gep_addrspacecast_gep(i64 %X) {
; CHECK-LABEL: @alloca_gep_addrspacecast_gep(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A_AS0:%.*]] = alloca [256 x i8], align 4
-; CHECK-NEXT: [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32
-; CHECK-NEXT: [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64*
-; CHECK-NEXT: store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4
-; CHECK-NEXT: [[GEPB_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 16
-; CHECK-NEXT: [[GEPB_AS1:%.*]] = addrspacecast i8* [[GEPB_AS0]] to i8 addrspace(1)*
-; CHECK-NEXT: [[GEPC_AS1:%.*]] = getelementptr i8, i8 addrspace(1)* [[GEPB_AS1]], i16 16
-; CHECK-NEXT: [[GEPC_AS1_BC:%.*]] = bitcast i8 addrspace(1)* [[GEPC_AS1]] to i64 addrspace(1)*
-; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPC_AS1_BC]], align 4
-; CHECK-NEXT: ret i64 [[Z]]
+; CHECK-NEXT: ret i64 [[X:%.*]]
;
entry:
%A.as0 = alloca [256 x i8], align 4
define void @select_addrspacecast(i1 %a, i1 %b) {
; CHECK-LABEL: @select_addrspacecast(
-; CHECK-NEXT: [[C:%.*]] = alloca i64, align 8
-; CHECK-NEXT: [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]]
-; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)*
-; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* [[ASC]]
-; CHECK-NEXT: [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8
; CHECK-NEXT: ret void
;
%c = alloca i64, align 8
define void @select_addrspacecast_const_op(i1 %a, i1 %b) {
; CHECK-LABEL: @select_addrspacecast_const_op(
; CHECK-NEXT: [[C:%.*]] = alloca i64, align 8
-; CHECK-NEXT: [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]]
-; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)*
-; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* null
+; CHECK-NEXT: [[C_0_ASC_SROA_CAST:%.*]] = addrspacecast i64* [[C]] to i64 addrspace(1)*
+; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[C_0_ASC_SROA_CAST]], i64 addrspace(1)* null
; CHECK-NEXT: [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8
; CHECK-NEXT: ret void
;
define void @select_addrspacecast_gv(i1 %a, i1 %b) {
; CHECK-LABEL: @select_addrspacecast_gv(
-; CHECK-NEXT: [[C:%.*]] = alloca i64, align 8
-; CHECK-NEXT: [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]]
-; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)*
-; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* @gv
-; CHECK-NEXT: [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8
+; CHECK-NEXT: [[COND_SROA_SPECULATE_LOAD_FALSE:%.*]] = load i64, i64 addrspace(1)* @gv, align 8
+; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 undef, i64 undef, i64 [[COND_SROA_SPECULATE_LOAD_FALSE]]
; CHECK-NEXT: ret void
;
%c = alloca i64, align 8
ret i64 %Z
}
+define i64 @test2_addrspacecast(i64 %X) {
+; CHECK-LABEL: @test2_addrspacecast(
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
+
+entry:
+ %A = alloca [8 x i8]
+ %B = addrspacecast [8 x i8]* %A to i64 addrspace(1)*
+ store i64 %X, i64 addrspace(1)* %B
+ br label %L2
+
+L2:
+ %Z = load i64, i64 addrspace(1)* %B
+ ret i64 %Z
+}
+
+define i64 @test2_addrspacecast_gep(i64 %X, i16 %idx) {
+; CHECK-LABEL: @test2_addrspacecast_gep(
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
+
+entry:
+ %A = alloca [256 x i8]
+ %B = addrspacecast [256 x i8]* %A to i64 addrspace(1)*
+ %gepA = getelementptr [256 x i8], [256 x i8]* %A, i16 0, i16 32
+ %gepB = getelementptr i64, i64 addrspace(1)* %B, i16 4
+ store i64 %X, i64 addrspace(1)* %gepB, align 1
+ br label %L2
+
+L2:
+ %gepA.bc = bitcast i8* %gepA to i64*
+ %Z = load i64, i64* %gepA.bc, align 1
+ ret i64 %Z
+}
+
+; Avoid crashing when load/storing at at different offsets.
+define i64 @test2_addrspacecast_gep_offset(i64 %X) {
+; CHECK-LABEL: @test2_addrspacecast_gep_offset(
+; CHECK: %A.sroa.0 = alloca [10 x i8]
+; CHECK: [[GEP0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %A.sroa.0, i16 0, i16 2
+; CHECK-NEXT: [[GEP1:%.*]] = addrspacecast i8* [[GEP0]] to i64 addrspace(1)*
+; CHECK-NEXT: store i64 %X, i64 addrspace(1)* [[GEP1]], align 1
+; CHECK: br
+
+; CHECK: [[BITCAST:%.*]] = bitcast [10 x i8]* %A.sroa.0 to i64*
+; CHECK: %A.sroa.0.0.A.sroa.0.30.Z = load i64, i64* [[BITCAST]], align 1
+; CHECK-NEXT: ret
+entry:
+ %A = alloca [256 x i8]
+ %B = addrspacecast [256 x i8]* %A to i64 addrspace(1)*
+ %gepA = getelementptr [256 x i8], [256 x i8]* %A, i16 0, i16 30
+ %gepB = getelementptr i64, i64 addrspace(1)* %B, i16 4
+ store i64 %X, i64 addrspace(1)* %gepB, align 1
+ br label %L2
+
+L2:
+ %gepA.bc = bitcast i8* %gepA to i64*
+ %Z = load i64, i64* %gepA.bc, align 1
+ ret i64 %Z
+}
+
define void @test3(i8* %dst, i8* align 8 %src) {
; CHECK-LABEL: @test3(
ret i16 %val
}
+define i16 @test5_multi_addrspace_access() {
+; CHECK-LABEL: @test5_multi_addrspace_access(
+; CHECK-NOT: alloca float
+; CHECK: %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
+; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
+; CHECK-NEXT: ret i16 %[[trunc]]
+
+entry:
+ %a = alloca [4 x i8]
+ %fptr = bitcast [4 x i8]* %a to float*
+ %fptr.as1 = addrspacecast float* %fptr to float addrspace(1)*
+ store float 0.0, float addrspace(1)* %fptr.as1
+ %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 2
+ %iptr = bitcast i8* %ptr to i16*
+ %val = load i16, i16* %iptr
+ ret i16 %val
+}
+
define i32 @test6() {
; CHECK-LABEL: @test6(
; CHECK: alloca i32
ret i32 undef
}
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind
+
+define i32 @test19_addrspacecast(%opaque* %x) {
+; This input will cause us to try to compute a natural GEP when rewriting
+; pointers in such a way that we try to GEP through the opaque type. Previously,
+; a check for an unsized type was missing and this crashed. Ensure it behaves
+; reasonably now.
+; CHECK-LABEL: @test19_addrspacecast(
+; CHECK-NOT: alloca
+; CHECK: ret i32 undef
+
+entry:
+ %a = alloca { i64, i8* }
+ %cast1 = addrspacecast %opaque* %x to i8 addrspace(1)*
+ %cast2 = bitcast { i64, i8* }* %a to i8*
+ call void @llvm.memcpy.p0i8.p1i8.i32(i8* %cast2, i8 addrspace(1)* %cast1, i32 16, i32 1, i1 false)
+ %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0
+ %val = load i64, i64* %gep
+ ret i32 undef
+}
+
define i32 @test20() {
; Ensure we can track negative offsets (before the beginning of the alloca) and
; negative relative offsets from offsets starting past the end of the alloca.
define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) {
; Make sure this the right address space pointer is used for type check.
; CHECK-LABEL: @PR14105_as1(
+; CHECK: alloca { [16 x i8] }, align 8
+; CHECK-NEXT: %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1
+; CHECK-NEXT: %cast1 = bitcast { [16 x i8] } addrspace(1)* %gep to i8 addrspace(1)*
+; CHECK-NEXT: %cast2 = bitcast { [16 x i8] }* %a to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true)
entry:
%a = alloca { [16 x i8] }, align 8
-; CHECK: alloca [16 x i8], align 8
-
%gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1
-; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i16 0
-
%cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
%cast2 = bitcast { [16 x i8 ] }* %a to i8*
call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true)
ret i32 %result
}
+; If bitcast isn't considered a safe phi/select use, the alloca
+; remains as an array.
+; FIXME: Why isn't this identical to test2?
+
+; CHECK-LABEL: @test2_bitcast(
+; CHECK: alloca i32
+; CHECK-NEXT: alloca i32
+
+; CHECK: %select = select i1 %cond, i32* %a.sroa.3, i32* %a.sroa.0
+; CHECK-NEXT: %select.bc = bitcast i32* %select to float*
+; CHECK-NEXT: %result = load float, float* %select.bc, align 4
+define float @test2_bitcast() {
+entry:
+ %a = alloca [2 x i32]
+ %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+ %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+ store i32 0, i32* %a0
+ store i32 1, i32* %a1
+ %v0 = load i32, i32* %a0
+ %v1 = load i32, i32* %a1
+ %cond = icmp sle i32 %v0, %v1
+ %select = select i1 %cond, i32* %a1, i32* %a0
+ %select.bc = bitcast i32* %select to float*
+ %result = load float, float* %select.bc
+ ret float %result
+}
+
+; CHECK-LABEL: @test2_addrspacecast(
+; CHECK: alloca i32
+; CHECK-NEXT: alloca i32
+
+; CHECK: %select = select i1 %cond, i32* %a.sroa.3, i32* %a.sroa.0
+; CHECK-NEXT: %select.asc = addrspacecast i32* %select to i32 addrspace(1)*
+; CHECK-NEXT: load i32, i32 addrspace(1)* %select.asc, align 4
+define i32 @test2_addrspacecast() {
+entry:
+ %a = alloca [2 x i32]
+ %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+ %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+ store i32 0, i32* %a0
+ store i32 1, i32* %a1
+ %v0 = load i32, i32* %a0
+ %v1 = load i32, i32* %a1
+ %cond = icmp sle i32 %v0, %v1
+ %select = select i1 %cond, i32* %a1, i32* %a0
+ %select.asc = addrspacecast i32* %select to i32 addrspace(1)*
+ %result = load i32, i32 addrspace(1)* %select.asc
+ ret i32 %result
+}
+
define i32 @test3(i32 %x) {
; CHECK-LABEL: @test3(
entry: