[ARM] Make f16 interleaved accesses expensive.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Sat, 11 Feb 2017 01:53:04 +0000 (01:53 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Sat, 11 Feb 2017 01:53:04 +0000 (01:53 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Sat, 11 Feb 2017 01:53:04 +0000 (01:53 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Sat, 11 Feb 2017 01:53:04 +0000 (01:53 +0000)
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp

index 2b6b36bc3e6831b550c56f441e10da51a07391f6..90e4f3a7cc6f00ae6bd1cd21d3c093df496fcfba 100644 (file)
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -533,7 +533,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
      unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
  
      // vldN/vstN only support legal vector types of size 64 or 128 in bits.
-    if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
+    if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128) &&
+        !VecTy->getScalarType()->isHalfTy())
        return Factor;
    }
  
diff --git a/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll

index 012a5fc16867a42eb7d27fb5b4d19bda6f0b7eb6..685c523d495a8e17b08b6d8823c004c20ea26dfa 100644 (file)
--- a/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -99,3 +99,34 @@ for.body:
  for.end:
    ret void
  }
+
+%half.2 = type {half, half}
+define void @half_factor_2(%half.2* %data, i64 %n) {
+entry:
+  br label %for.body
+
+; VF_4-LABEL: Checking a loop in "half_factor_2"
+; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
+; VF_4-NEXT:    Found an estimated cost of 40 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_8-LABEL: Checking a loop in "half_factor_2"
+; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
+; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
+; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
+; VF_8-NEXT:    Found an estimated cost of 80 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds %half.2, %half.2* %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %half.2, %half.2* %data, i64 %i, i32 1
+  %tmp2 = load half, half* %tmp0, align 2
+  %tmp3 = load half, half* %tmp1, align 2
+  store half 0., half* %tmp0, align 2
+  store half 0., half* %tmp1, align 2
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Sat, 11 Feb 2017 01:53:04 +0000 (01:53 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Sat, 11 Feb 2017 01:53:04 +0000 (01:53 +0000)
lib/Target/ARM/ARMTargetTransformInfo.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/ARM/interleaved_cost.ll		patch \| blob \| history