[LSV] Avoid adding vectors of pointers as candidates

author Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>

Thu, 26 Oct 2017 13:59:15 +0000 (13:59 +0000)

committer Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>

Thu, 26 Oct 2017 13:59:15 +0000 (13:59 +0000)
author Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Thu, 26 Oct 2017 13:59:15 +0000 (13:59 +0000)
committer Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Thu, 26 Oct 2017 13:59:15 +0000 (13:59 +0000)
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

index 8394f5ec73fcd743e71a6e0910fa0fc8e48b7614..5d355c520210c60cdf1d22a60091c33ee64cc07d 100644 (file)
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -616,6 +616,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
        if ((TySize % 8) != 0)
          continue;
  
+      // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
+      // functions are currently using an integer type for the vectorized
+      // load/store, and does not support casting between the integer type and a
+      // vector of pointers (e.g. i64 to <2 x i16*>)
+      if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
+        continue;
+
        Value *Ptr = LI->getPointerOperand();
        unsigned AS = Ptr->getType()->getPointerAddressSpace();
        unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
@@ -646,6 +653,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
        if (!VectorType::isValidElementType(Ty->getScalarType()))
          continue;
  
+      // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
+      // functions are currently using an integer type for the vectorized
+      // load/store, and does not support casting between the integer type and a
+      // vector of pointers (e.g. i64 to <2 x i16*>)
+      if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
+        continue;
+
        // Skip weird non-byte sizes. They probably aren't worth the effort of
        // handling correctly.
        unsigned TySize = DL.getTypeSizeInBits(Ty);
@@ -701,8 +715,8 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
    SmallVector<int, 16> Heads, Tails;
    int ConsecutiveChain[64];
  
-  // Do a quadratic search on all of the given stores and find all of the pairs
-  // of stores that follow each other.
+  // Do a quadratic search on all of the given loads/stores and find all of the
+  // pairs of loads/stores that follow each other.
    for (int i = 0, e = Instrs.size(); i < e; ++i) {
      ConsecutiveChain[i] = -1;
      for (int j = e - 1; j >= 0; --j) {
@@ -769,7 +783,7 @@ bool Vectorizer::vectorizeStoreChain(
      SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
    StoreInst *S0 = cast<StoreInst>(Chain[0]);
  
-  // If the vector has an int element, default to int for the whole load.
+  // If the vector has an int element, default to int for the whole store.
    Type *StoreTy;
    for (Instruction *I : Chain) {
      StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll

index dbb7068eeae0d74c9c25337c93a258e2858d8df4..19fc44bb6c85f8bc28d743e288b2e2b8bb6e0254 100644 (file)
--- a/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
@@ -632,6 +632,26 @@ define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias
    ret void
  }
  
+; Verify that we no longer hit asserts for this test case. No change expected.
+; CHECK-LABEL: @copy_vec_of_ptrs
+; CHECK: %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
+; CHECK: %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
+; CHECK: %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
+; CHECK: %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
+; CHECK: store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
+; CHECK: store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
+define amdgpu_kernel void @copy_vec_of_ptrs(<2 x i16*> addrspace(1)* %out,
+                                            <2 x i16*> addrspace(1)* %in ) #0 {
+  %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
+  %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
+  %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
+
+  %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
+  store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
+  store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
+  ret void
+}
+
  declare void @llvm.amdgcn.s.barrier() #1
  
  attributes #0 = { nounwind }
author	Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
	Thu, 26 Oct 2017 13:59:15 +0000 (13:59 +0000)
committer	Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
	Thu, 26 Oct 2017 13:59:15 +0000 (13:59 +0000)
lib/Transforms/Vectorize/LoadStoreVectorizer.cpp		patch \| blob \| history
test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll		patch \| blob \| history