if ((TySize % 8) != 0)
continue;
+ // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
+ // functions are currently using an integer type for the vectorized
+ // load/store, and does not support casting between the integer type and a
+ // vector of pointers (e.g. i64 to <2 x i16*>)
+ if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
+ continue;
+
Value *Ptr = LI->getPointerOperand();
unsigned AS = Ptr->getType()->getPointerAddressSpace();
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
if (!VectorType::isValidElementType(Ty->getScalarType()))
continue;
+ // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
+ // functions are currently using an integer type for the vectorized
+ // load/store, and does not support casting between the integer type and a
+ // vector of pointers (e.g. i64 to <2 x i16*>)
+ if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
+ continue;
+
// Skip weird non-byte sizes. They probably aren't worth the effort of
// handling correctly.
unsigned TySize = DL.getTypeSizeInBits(Ty);
SmallVector<int, 16> Heads, Tails;
int ConsecutiveChain[64];
- // Do a quadratic search on all of the given stores and find all of the pairs
- // of stores that follow each other.
+ // Do a quadratic search on all of the given loads/stores and find all of the
+ // pairs of loads/stores that follow each other.
for (int i = 0, e = Instrs.size(); i < e; ++i) {
ConsecutiveChain[i] = -1;
for (int j = e - 1; j >= 0; --j) {
SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
StoreInst *S0 = cast<StoreInst>(Chain[0]);
- // If the vector has an int element, default to int for the whole load.
+ // If the vector has an int element, default to int for the whole store.
Type *StoreTy;
for (Instruction *I : Chain) {
StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
ret void
}
+; Verify that we no longer hit asserts for this test case. No change expected.
+; CHECK-LABEL: @copy_vec_of_ptrs
+; CHECK: %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
+; CHECK: %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
+; CHECK: %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
+; CHECK: %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
+; CHECK: store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
+; CHECK: store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
+define amdgpu_kernel void @copy_vec_of_ptrs(<2 x i16*> addrspace(1)* %out,
+ <2 x i16*> addrspace(1)* %in ) #0 {
+ %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
+ %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
+ %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
+
+ %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
+ store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
+ store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
+ ret void
+}
+
declare void @llvm.amdgcn.s.barrier() #1
attributes #0 = { nounwind }