From: Jin-Gu Kang <jaykang10@gmail.com>
Date: Tue, 4 Apr 2017 16:40:25 +0000 (+0000)
Subject: Preserve vec3 type.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a463701ed2c3612812c34c1057a6dbdf1fdb8224;p=clang

Preserve vec3 type.

Summary: Preserve vec3 type with CodeGen option.

Reviewers: Anastasia, bruno

Reviewed By: Anastasia

Subscribers: bruno, ahatanak, cfe-commits

Differential Revision: https://reviews.llvm.org/D30810

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@299445 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td
index a683efecee..d35844c1f1 100644
--- a/include/clang/Driver/CC1Options.td
+++ b/include/clang/Driver/CC1Options.td
@@ -658,6 +658,8 @@ def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">,
   HelpText<"Set default MS calling convention">;
 def finclude_default_header : Flag<["-"], "finclude-default-header">,
   HelpText<"Include the default header file for OpenCL">;
+def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
+  HelpText<"Preserve 3-component vector type">;
 
 // FIXME: Remove these entirely once functionality/tests have been excised.
 def fobjc_gc_only : Flag<["-"], "fobjc-gc-only">, Group<f_Group>,
diff --git a/include/clang/Frontend/CodeGenOptions.def b/include/clang/Frontend/CodeGenOptions.def
index 751f5e2944..9565b22e70 100644
--- a/include/clang/Frontend/CodeGenOptions.def
+++ b/include/clang/Frontend/CodeGenOptions.def
@@ -263,6 +263,9 @@ CODEGENOPT(StrictReturn, 1, 1)
 /// Whether emit extra debug info for sample pgo profile collection.
 CODEGENOPT(DebugInfoForProfiling, 1, 0)
 
+/// Whether 3-component vector type is preserved.
+CODEGENOPT(PreserveVec3Type, 1, 0)
+
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
 #undef VALUE_CODEGENOPT
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 1a95eb1045..38eb345d91 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -1369,26 +1369,28 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
                                                QualType TBAABaseType,
                                                uint64_t TBAAOffset,
                                                bool isNontemporal) {
-  // For better performance, handle vector loads differently.
-  if (Ty->isVectorType()) {
-    const llvm::Type *EltTy = Addr.getElementType();
-
-    const auto *VTy = cast<llvm::VectorType>(EltTy);
-
-    // Handle vectors of size 3 like size 4 for better performance.
-    if (VTy->getNumElements() == 3) {
-
-      // Bitcast to vec4 type.
-      llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
-                                                         4);
-      Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
-      // Now load value.
-      llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
-
-      // Shuffle vector to get vec3.
-      V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
-                                      {0, 1, 2}, "extractVec");
-      return EmitFromMemory(V, Ty);
+  if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+    // For better performance, handle vector loads differently.
+    if (Ty->isVectorType()) {
+      const llvm::Type *EltTy = Addr.getElementType();
+
+      const auto *VTy = cast<llvm::VectorType>(EltTy);
+
+      // Handle vectors of size 3 like size 4 for better performance.
+      if (VTy->getNumElements() == 3) {
+
+        // Bitcast to vec4 type.
+        llvm::VectorType *vec4Ty =
+            llvm::VectorType::get(VTy->getElementType(), 4);
+        Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
+        // Now load value.
+        llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+
+        // Shuffle vector to get vec3.
+        V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
+                                        {0, 1, 2}, "extractVec");
+        return EmitFromMemory(V, Ty);
+      }
     }
   }
 
@@ -1456,24 +1458,25 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
                                         uint64_t TBAAOffset,
                                         bool isNontemporal) {
 
-  // Handle vectors differently to get better performance.
-  if (Ty->isVectorType()) {
-    llvm::Type *SrcTy = Value->getType();
-    auto *VecTy = cast<llvm::VectorType>(SrcTy);
-    // Handle vec3 special.
-    if (VecTy->getNumElements() == 3) {
-      // Our source is a vec3, do a shuffle vector to make it a vec4.
-      llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
-                                Builder.getInt32(2),
-                                llvm::UndefValue::get(Builder.getInt32Ty())};
-      llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
-      Value = Builder.CreateShuffleVector(Value,
-                                          llvm::UndefValue::get(VecTy),
-                                          MaskV, "extractVec");
-      SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
-    }
-    if (Addr.getElementType() != SrcTy) {
-      Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+  if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+    // Handle vectors differently to get better performance.
+    if (Ty->isVectorType()) {
+      llvm::Type *SrcTy = Value->getType();
+      auto *VecTy = cast<llvm::VectorType>(SrcTy);
+      // Handle vec3 special.
+      if (VecTy->getNumElements() == 3) {
+        // Our source is a vec3, do a shuffle vector to make it a vec4.
+        llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
+                                  Builder.getInt32(2),
+                                  llvm::UndefValue::get(Builder.getInt32Ty())};
+        llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+        Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
+                                            MaskV, "extractVec");
+        SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+      }
+      if (Addr.getElementType() != SrcTy) {
+        Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+      }
     }
   }
 
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index e9041b282f..8ee4d3f7be 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -3593,8 +3593,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   // vector to get a vec4, then a bitcast if the target type is different.
   if (NumElementsSrc == 3 && NumElementsDst != 3) {
     Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
-    Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
-                                       DstTy);
+
+    if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+      Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+                                         DstTy);
+    }
+
     Src->setName("astype");
     return Src;
   }
@@ -3603,9 +3607,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   // to vec4 if the original type is not vec4, then a shuffle vector to
   // get a vec3.
   if (NumElementsSrc != 3 && NumElementsDst == 3) {
-    auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
-    Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
-                                       Vec4Ty);
+    if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+      auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+      Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+                                         Vec4Ty);
+    }
+
     Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
     Src->setName("astype");
     return Src;
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 9138450181..271d1ca4f6 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -729,6 +729,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
     }
   }
 
+  Opts.PreserveVec3Type = Args.hasArg(OPT_fpreserve_vec3_type);
   Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
   Opts.XRayInstrumentFunctions = Args.hasArg(OPT_fxray_instrument);
   Opts.XRayInstructionThreshold =
diff --git a/test/CodeGenOpenCL/preserve_vec3.cl b/test/CodeGenOpenCL/preserve_vec3.cl
new file mode 100644
index 0000000000..6efbbb3d53
--- /dev/null
+++ b/test/CodeGenOpenCL/preserve_vec3.cl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type  | FileCheck %s
+
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+
+void kernel foo(global float3 *a, global float3 *b) {
+  // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a
+  // CHECK: store <3 x float> %[[LOAD_A]], <3 x float> addrspace(1)* %b
+  *b = *a;
+}
+
+void kernel float4_to_float3(global float3 *a, global float4 *b) {
+  // CHECK: %[[LOAD_A:.*]] = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
+  // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  // CHECK: store <3 x float> %[[ASTYPE:.*]], <3 x float> addrspace(1)* %a, align 16
+  *a = __builtin_astype(*b, float3);
+}
+
+void kernel float3_to_float4(global float3 *a, global float4 *b) {
+  // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a, align 16
+  // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  // CHECK: store <4 x float> %[[ASTYPE:.*]], <4 x float> addrspace(1)* %b, align 16
+  *b = __builtin_astype(*a, float4);
+}