]> granicus.if.org Git - llvm/commitdiff
Add TLI.allowsMisalignedMemoryAccesses to LoadStoreVectorizer
authorAlina Sbirlea <asbirlea@google.com>
Mon, 11 Jul 2016 20:46:17 +0000 (20:46 +0000)
committerAlina Sbirlea <asbirlea@google.com>
Mon, 11 Jul 2016 20:46:17 +0000 (20:46 +0000)
Summary: Extend TTI to access TLI.allowsMisalignedMemoryAccesses(). Check condition when vectorizing load and store chains.
Add additional parameters: AddressSpace, Alignment, Fast.

Reviewers: llvm-commits, jlebar

Subscribers: arsenm, mzolotukhin

Differential Revision: http://reviews.llvm.org/D21935

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275100 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Analysis/TargetTransformInfo.h
include/llvm/Analysis/TargetTransformInfoImpl.h
include/llvm/CodeGen/BasicTTIImpl.h
lib/Analysis/TargetTransformInfo.cpp
lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll

index 50c8e8aaec293a6dadbf80f387ad175d1ec2df9a..7d11d4df638c8eaad246957469fcd95db2b79f39 100644 (file)
@@ -388,6 +388,11 @@ public:
   /// operations, shuffles, or casts.
   bool isFPVectorizationPotentiallyUnsafe() const;
 
+  /// \brief Determine if the target supports unaligned memory accesses.
+  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
+                                      unsigned Alignment = 1,
+                                      bool *Fast = nullptr) const;
+
   /// \brief Return hardware support for population count.
   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 
@@ -653,6 +658,10 @@ public:
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
+  virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+                                              unsigned AddressSpace,
+                                              unsigned Alignment,
+                                              bool *Fast) = 0;
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
   virtual bool haveFastSqrt(Type *Ty) = 0;
   virtual int getFPOpCost(Type *Ty) = 0;
@@ -820,6 +829,11 @@ public:
   bool isFPVectorizationPotentiallyUnsafe() override {
     return Impl.isFPVectorizationPotentiallyUnsafe();
   }
+  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+                                      unsigned Alignment, bool *Fast) override {
+    return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+                                               Alignment, Fast);
+  }
   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
     return Impl.getPopcntSupport(IntTyWidthInBit);
   }
index b9baee1010471d29ff7c8cc34df3491f2c42e09a..52e7de6b222b513b9ba30e1493bbe3badcf54eed 100644 (file)
@@ -244,6 +244,11 @@ public:
 
   bool isFPVectorizationPotentiallyUnsafe() { return false; }
 
+  bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+                                      unsigned AddressSpace,
+                                      unsigned Alignment,
+                                      bool *Fast) { return false; }
+
   TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
     return TTI::PSK_Software;
   }
index e4b0aa84323c12df1fc29cacab076c475a25e59e..69951afb623c3777aef9ade42fb4091260cac606 100644 (file)
@@ -105,6 +105,11 @@ public:
 
   /// \name Scalar TTI Implementations
   /// @{
+  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+                                      unsigned Alignment, bool *Fast) const {
+    MVT M = MVT::getIntegerVT(BitWidth);
+    return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast);
+  }
 
   bool hasBranchDivergence() { return false; }
 
index 51440f5b2df1cbead0f15d0c0c61cdd2d2f38d8d..55b50ae42bc0b22d107e8d4abbff613ae4245f45 100644 (file)
@@ -186,6 +186,14 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
   return TTIImpl->isFPVectorizationPotentiallyUnsafe();
 }
 
+bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth,
+                                                         unsigned AddressSpace,
+                                                         unsigned Alignment,
+                                                         bool *Fast) const {
+  return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+                                                 Alignment, Fast);
+}
+
 TargetTransformInfo::PopcntSupportKind
 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
   return TTIImpl->getPopcntSupport(IntTyWidthInBit);
index 8166361636f02286006fd7c1bc1c476bf1cde210..9c581a4603b523d11404c5d48256e60d50428eb0 100644 (file)
@@ -127,6 +127,10 @@ private:
 
   /// Vectorizes the store instructions in Chain.
   bool vectorizeStoreChain(ArrayRef<Value *> Chain);
+
+  /// Check if this load/store access is misaligned accesses
+  bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
+                          unsigned Alignment);
 };
 
 class LoadStoreVectorizer : public FunctionPass {
@@ -692,18 +696,16 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
   unsigned Alignment = getAlignment(S0);
 
   // If the store is going to be misaligned, don't vectorize it.
-  // TODO: Check TLI.allowsMisalignedMemoryAccess
-  if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) {
-    if (S0->getPointerAddressSpace() == 0) {
-      // If we're storing to an object on the stack, we control its alignment,
-      // so we can cheat and change it!
-      Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL);
-      if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
-        AI->setAlignment(TargetBaseAlign);
-        Alignment = TargetBaseAlign;
-      } else {
-        return false;
-      }
+  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
+    if (S0->getPointerAddressSpace() != 0)
+      return false;
+
+    // If we're storing to an object on the stack, we control its alignment,
+    // so we can cheat and change it!
+    Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL);
+    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
+      AI->setAlignment(TargetBaseAlign);
+      Alignment = TargetBaseAlign;
     } else {
       return false;
     }
@@ -821,18 +823,16 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
   unsigned Alignment = getAlignment(L0);
 
   // If the load is going to be misaligned, don't vectorize it.
-  // TODO: Check TLI.allowsMisalignedMemoryAccess and remove TargetBaseAlign.
-  if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) {
-    if (L0->getPointerAddressSpace() == 0) {
-      // If we're loading from an object on the stack, we control its alignment,
-      // so we can cheat and change it!
-      Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL);
-      if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
-        AI->setAlignment(TargetBaseAlign);
-        Alignment = TargetBaseAlign;
-      } else {
-        return false;
-      }
+  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
+    if (L0->getPointerAddressSpace() != 0)
+      return false;
+
+    // If we're loading from an object on the stack, we control its alignment,
+    // so we can cheat and change it!
+    Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL);
+    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
+      AI->setAlignment(TargetBaseAlign);
+      Alignment = TargetBaseAlign;
     } else {
       return false;
     }
@@ -915,3 +915,13 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
   NumScalarsVectorized += Chain.size();
   return true;
 }
+
+bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
+                                    unsigned Alignment) {
+  bool Fast = false;
+  bool Allows = TTI.allowsMisalignedMemoryAccesses(SzInBytes * 8, AddressSpace,
+                                                   Alignment, &Fast);
+  // TODO: Remove TargetBaseAlign
+  return !(Allows && Fast) && (Alignment % SzInBytes) != 0 &&
+         (Alignment % TargetBaseAlign) != 0;
+}
index 961ab1d1fa36187a643c917c260588e560927755..03265efe2843d41e4d758128d3ab7c042f2cce15 100644 (file)
@@ -19,8 +19,7 @@ define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
 }
 
 ; CHECK-LABEL: @merge_global_store_2_constants_i8_natural_align
-; CHECK: store i8
-; CHECK: store i8
+; CHECK: store <2 x i8>
 define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
 
@@ -50,8 +49,7 @@ define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
 }
 
 ; CHECK-LABEL: @merge_global_store_2_constants_i16_natural_align
-; CHECK: store i16
-; CHECK: store i16
+; CHECK: store <2 x i16>
 define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
 
@@ -61,8 +59,7 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)*
 }
 
 ; CHECK-LABEL: @merge_global_store_2_constants_half_natural_align
-; CHECK: store half
-; CHECK: store half
+; CHECK: store <2 x half>
 define void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
 
@@ -432,14 +429,8 @@ define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 ad
 }
 
 ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i8_natural_align
-; CHECK: load i8
-; CHECK: load i8
-; CHECK: load i8
-; CHECK: load i8
-; CHECK: store i8
-; CHECK: store i8
-; CHECK: store i8
-; CHECK: store i8
+; CHECK: load <4 x i8>
+; CHECK: store <4 x i8>
 define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
   %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
   %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2