[AMDGPU] gfx1010 wave32 clang support

author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Thu, 13 Jun 2019 23:47:59 +0000 (23:47 +0000)

committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Thu, 13 Jun 2019 23:47:59 +0000 (23:47 +0000)
author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 13 Jun 2019 23:47:59 +0000 (23:47 +0000)
committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Thu, 13 Jun 2019 23:47:59 +0000 (23:47 +0000)
diff --git a/docs/ClangCommandLineReference.rst b/docs/ClangCommandLineReference.rst

index 5a34c9606f79f71912110f25108f4e28c37ceaa9..30ac27f95f9ddce3c3741f66da0709c9cb1ed6d8 100644 (file)
--- a/docs/ClangCommandLineReference.rst
+++ b/docs/ClangCommandLineReference.rst
@@ -2401,6 +2401,10 @@ AMDGPU
  CU wavefront execution mode is used if enabled and WGP wavefront execution mode
  is used if disabled (AMDGPU only)
  
+.. option:: -mwavefrontsize64, -mno-wavefrontsize64
+
+Wavefront size 64 is used if enabled and wavefront size 32 if disabled (AMDGPU only)
+
  .. option:: -mxnack, -mno-xnack
  
  Enable XNACK (AMDGPU only)
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td

index 90b6092b1baed84e6ff498d4726d6d7d2a4eff1a..65c1721daa8b8d78aa33e79c316aaad9b75257e1 100644 (file)
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -2216,6 +2216,11 @@ def mcumode : Flag<["-"], "mcumode">, Group<m_amdgpu_Features_Group>,
  def mno_cumode : Flag<["-"], "mno-cumode">, Group<m_amdgpu_Features_Group>,
    HelpText<"WGP wavefront execution mode is used (AMDGPU only)">;
  
+def mwavefrontsize64 : Flag<["-"], "mwavefrontsize64">,
+  Group<m_Group>, HelpText<"Wavefront size 64 is used">;
+def mno_wavefrontsize64 : Flag<["-"], "mno-wavefrontsize64">,
+  Group<m_Group>, HelpText<"Wavefront size 32 is used">;
+
  def faltivec : Flag<["-"], "faltivec">, Group<f_Group>, Flags<[DriverOption]>;
  def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>, Flags<[DriverOption]>;
  def maltivec : Flag<["-"], "maltivec">, Group<m_ppc_Features_Group>;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp

index 81f3e5664aa54cd0080b41614c3826df8c37196d..287e691870c27bcf8493fd320a6573378b866f31 100644 (file)
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -12736,11 +12736,27 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
    case AMDGPU::BI__builtin_amdgcn_uicmp:
    case AMDGPU::BI__builtin_amdgcn_uicmpl:
    case AMDGPU::BI__builtin_amdgcn_sicmp:
-  case AMDGPU::BI__builtin_amdgcn_sicmpl:
-    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
+  case AMDGPU::BI__builtin_amdgcn_sicmpl: {
+    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
+    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
+
+    // FIXME-GFX10: How should 32 bit mask be handled?
+    Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
+      { Builder.getInt64Ty(), Src0->getType() });
+    return Builder.CreateCall(F, { Src0, Src1, Src2 });
+  }
    case AMDGPU::BI__builtin_amdgcn_fcmp:
-  case AMDGPU::BI__builtin_amdgcn_fcmpf:
-    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
+  case AMDGPU::BI__builtin_amdgcn_fcmpf: {
+    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
+    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
+
+    // FIXME-GFX10: How should 32 bit mask be handled?
+    Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
+      { Builder.getInt64Ty(), Src0->getType() });
+    return Builder.CreateCall(F, { Src0, Src1, Src2 });
+  }
    case AMDGPU::BI__builtin_amdgcn_class:
    case AMDGPU::BI__builtin_amdgcn_classf:
    case AMDGPU::BI__builtin_amdgcn_classh:
diff --git a/lib/Driver/ToolChains/AMDGPU.cpp b/lib/Driver/ToolChains/AMDGPU.cpp

index 7f6ddabb2ac1deb3d5e55cd913aab5e5882d9450..df4e7ee202bfd6db0a928db50ef2b1201bb236fe 100644 (file)
--- a/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/lib/Driver/ToolChains/AMDGPU.cpp
@@ -41,6 +41,17 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
    if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
      D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
  
+  if (Args.getLastArg(options::OPT_mwavefrontsize64)) {
+    Features.push_back("-wavefrontsize16");
+    Features.push_back("-wavefrontsize32");
+    Features.push_back("+wavefrontsize64");
+  }
+  if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) {
+    Features.push_back("-wavefrontsize16");
+    Features.push_back("+wavefrontsize32");
+    Features.push_back("-wavefrontsize64");
+  }
+
    handleTargetFeaturesGroup(
      Args, Features, options::OPT_m_amdgpu_Features_Group);
  }
diff --git a/lib/Driver/ToolChains/HIP.cpp b/lib/Driver/ToolChains/HIP.cpp

index 0afd062d4faba6223bc74f4ac1fd723e0729e69e..a60485ab03b7e91d9ee9e09874e720cde6458fcf 100644 (file)
--- a/lib/Driver/ToolChains/HIP.cpp
+++ b/lib/Driver/ToolChains/HIP.cpp
@@ -316,15 +316,21 @@ void HIPToolChain::addClangTargetOptions(
      else
        FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
  
+    llvm::StringRef WaveFrontSizeBC;
+    if (stoi(GFXVersion) < 1000)
+      WaveFrontSizeBC = "oclc_wavefrontsize64_on.amdgcn.bc";
+    else
+      WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc";
+
      BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc",
                     "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc",
                     FlushDenormalControlBC,
                     "oclc_correctly_rounded_sqrt_on.amdgcn.bc",
-                   "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC});
+                   "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC,
+                   WaveFrontSizeBC});
    }
    for (auto Lib : BCLibs)
      addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib);
-
  }
  
  llvm::opt::DerivedArgList *
diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl

index 6b7ea52dab0bc442ef65bcd0cbf6007d0742bba6..bd7fe78fc96fb744b1440c25242cb063e141a4c6 100644 (file)
--- a/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -224,28 +224,28 @@ void test_lerp(global int* out, int a, int b, int c)
  }
  
  // CHECK-LABEL: @test_sicmp_i32
-// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
  void test_sicmp_i32(global ulong* out, int a, int b)
  {
    *out = __builtin_amdgcn_sicmp(a, b, 32);
  }
  
  // CHECK-LABEL: @test_uicmp_i32
-// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
  void test_uicmp_i32(global ulong* out, uint a, uint b)
  {
    *out = __builtin_amdgcn_uicmp(a, b, 32);
  }
  
  // CHECK-LABEL: @test_sicmp_i64
-// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 38)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 38)
  void test_sicmp_i64(global ulong* out, long a, long b)
  {
    *out = __builtin_amdgcn_sicmpl(a, b, 39-1);
  }
  
  // CHECK-LABEL: @test_uicmp_i64
-// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 35)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 35)
  void test_uicmp_i64(global ulong* out, ulong a, ulong b)
  {
    *out = __builtin_amdgcn_uicmpl(a, b, 30+5);
@@ -287,14 +287,14 @@ void test_readlane(global int* out, int a, int b)
  }
  
  // CHECK-LABEL: @test_fcmp_f32
-// CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 5)
+// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 5)
  void test_fcmp_f32(global ulong* out, float a, float b)
  {
    *out = __builtin_amdgcn_fcmpf(a, b, 5);
  }
  
  // CHECK-LABEL: @test_fcmp_f64
-// CHECK: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 6)
+// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f64(double %a, double %b, i32 6)
  void test_fcmp_f64(global ulong* out, double a, double b)
  {
    *out = __builtin_amdgcn_fcmp(a, b, 3+3);
diff --git a/test/Driver/amdgpu-features.c b/test/Driver/amdgpu-features.c

index 333f03a98670ad3d806ed20640bf8c27167eb18e..f2de25f6730ea54d36b34fa958e886bdaf99aa7c 100644 (file)
--- a/test/Driver/amdgpu-features.c
+++ b/test/Driver/amdgpu-features.c
@@ -24,6 +24,12 @@
  // RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-sram-ecc %s 2>&1 | FileCheck --check-prefix=NO-SRAM-ECC %s
  // NO-SRAM-ECC: "-target-feature" "-sram-ecc"
  
+// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s
+// WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "-wavefrontsize32" "-target-feature" "+wavefrontsize64"
+
+// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s
+// NO-WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "+wavefrontsize32" "-target-feature" "-wavefrontsize64"
+
  // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mcumode %s 2>&1 | FileCheck --check-prefix=CUMODE %s
  // CUMODE: "-target-feature" "+cumode"
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Thu, 13 Jun 2019 23:47:59 +0000 (23:47 +0000)
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Thu, 13 Jun 2019 23:47:59 +0000 (23:47 +0000)
docs/ClangCommandLineReference.rst		patch \| blob \| history
include/clang/Driver/Options.td		patch \| blob \| history
lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
lib/Driver/ToolChains/AMDGPU.cpp		patch \| blob \| history
lib/Driver/ToolChains/HIP.cpp		patch \| blob \| history
test/CodeGenOpenCL/builtins-amdgcn.cl		patch \| blob \| history
test/Driver/amdgpu-features.c		patch \| blob \| history