From: Artem Belevich Date: Wed, 21 Mar 2018 21:55:02 +0000 (+0000) Subject: [NVPTX] Make tensor shape part of WMMA intrinsic's name. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=350f0bf9a4723ed2b33a28f6af5cafd8383ed82c;p=clang [NVPTX] Make tensor shape part of WMMA intrinsic's name. This is needed for the upcoming implementation of the new 8x32x16 and 32x8x16 variants of WMMA instructions introduced in CUDA 9.1. Differential Revision: https://reviews.llvm.org/D44719 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@328158 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 996e5e7cd8..d3ea1f2868 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -10515,23 +10515,23 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, unsigned NumResults; switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_load_a_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; NumResults = 8; break; case NVPTX::BI__hmma_m16n16k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_load_b_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; NumResults = 8; break; case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_load_c_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; NumResults = 4; break; case NVPTX::BI__hmma_m16n16k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_load_c_f32_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; NumResults = 8; break; default: @@ -10566,13 +10566,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, // for some reason nvcc builtins use _c_. switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_store_d_f16_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; NumResults = 4; break; case NVPTX::BI__hmma_m16n16k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_store_d_f32_row_stride; + IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride + : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; break; default: llvm_unreachable("Unexpected builtin ID."); @@ -10591,8 +10591,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Result; } - // BI__hmma_m16n16k16_mma_(d, a, b, c, layout, satf) - // --> Intrinsic::nvvm_wmma_mma_sync + // BI__hmma_m16n16k16_mma_(d, a, b, c, layout, satf) --> + // Intrinsic::nvvm_wmma_m16n16k16_mma_sync case NVPTX::BI__hmma_m16n16k16_mma_f16f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f32: @@ -10613,15 +10613,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, bool Satf = SatfArg.getSExtValue(); // clang-format off -#define MMA_VARIANTS(type) {{ \ - Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \ - Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \ - Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \ - Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \ - Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \ +#define MMA_VARIANTS(type) {{ \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type, \ + Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type##_satfinite \ }} // clang-format on diff --git a/test/CodeGen/builtins-nvptx-sm_70.cu b/test/CodeGen/builtins-nvptx-sm_70.cu index 09e5b6ba7a..1e9133b3ac 100644 --- a/test/CodeGen/builtins-nvptx-sm_70.cu +++ b/test/CodeGen/builtins-nvptx-sm_70.cu @@ -22,145 +22,145 @@ typedef unsigned long long uint64_t; __device__ void nvvm_wmma(int *src, int *dst, float *fsrc, float *fdst, int ldm) { - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}} __hmma_m16n16k16_ld_a(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}} __hmma_m16n16k16_ld_a(dst, src+1, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}} __hmma_m16n16k16_ld_b(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}} __hmma_m16n16k16_ld_b(dst, src+2, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32 // expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32 + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32 // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); - // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32.satfinite + // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite // expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); }