From 350f0bf9a4723ed2b33a28f6af5cafd8383ed82c Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 21 Mar 2018 21:55:02 +0000 Subject: [PATCH] [NVPTX] Make tensor shape part of WMMA intrinsic's name. This is needed for the upcoming implementation of the new 8x32x16 and 32x8x16 variants of WMMA instructions introduced in CUDA 9.1. Differential Revision: https://reviews.llvm.org/D44719 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@328158 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGBuiltin.cpp | 46 +++++++-------- test/CodeGen/builtins-nvptx-sm_70.cu | 88 ++++++++++++++-------------- 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 996e5e7cd8..d3ea1f2868 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -10515,23 +10515,23 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, unsigned NumResults; switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_load_a_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; NumResults = 8; break; case NVPTX::BI__hmma_m16n16k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_load_b_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; NumResults = 8; break; case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_load_c_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; NumResults = 4; break; case NVPTX::BI__hmma_m16n16k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_load_c_f32_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; NumResults = 8; break; default: @@ -10566,13 +10566,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, // for some reason nvcc builtins use _c_. switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_store_d_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; NumResults = 4; break; case NVPTX::BI__hmma_m16n16k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_store_d_f32_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; break; default: llvm_unreachable("Unexpected builtin ID."); @@ -10591,8 +10591,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Result; } - // BI__hmma_m16n16k16_mma_(d, a, b, c, layout, satf) - // --> Intrinsic::nvvm_wmma_mma_sync + // BI__hmma_m16n16k16_mma_(d, a, b, c, layout, satf) --> + // Intrinsic::nvvm_wmma_m16n16k16_mma_sync case NVPTX::BI__hmma_m16n16k16_mma_f16f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f32: @@ -10613,15 +10613,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, bool Satf = SatfArg.getSExtValue(); // clang-format off -#define MMA_VARIANTS(type) {{ \ - Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \ - Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \ - Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \ - Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \ - Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \ +#define MMA_VARIANTS(type) {{ \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type##_satfinite \ }} // clang-format on diff --git a/test/CodeGen/builtins-nvptx-sm_70.cu b/test/CodeGen/builtins-nvptx-sm_70.cu index 09e5b6ba7a..1e9133b3ac 100644 --- a/test/CodeGen/builtins-nvptx-sm_70.cu +++ b/test/CodeGen/builtins-nvptx-sm_70.cu @@ -22,145 +22,145 @@ typedef unsigned long long uint64_t; __device__ void nvvm_wmma(int *src, int *dst, float *fsrc, float *fdst, int ldm) { - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}} __hmma_m16n16k16_ld_a(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}} __hmma_m16n16k16_ld_a(dst, src+1, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}} __hmma_m16n16k16_ld_b(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}} __hmma_m16n16k16_ld_b(dst, src+2, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); } -- 2.40.0