unsigned NumResults;
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_ld_a:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
- : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_b:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
- : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
- : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
- : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
NumResults = 8;
break;
default:
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
- : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_st_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
- : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
break;
default:
llvm_unreachable("Unexpected builtin ID.");
return Result;
}
- // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
- // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
+ // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
+ // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
bool Satf = SatfArg.getSExtValue();
// clang-format off
-#define MMA_VARIANTS(type) {{ \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \
+#define MMA_VARIANTS(type) {{ \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type##_satfinite \
}}
// clang-format on
__device__ void nvvm_wmma(int *src, int *dst,
float *fsrc, float *fdst,
int ldm) {
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
__hmma_m16n16k16_ld_a(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
__hmma_m16n16k16_ld_a(dst, src+1, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
__hmma_m16n16k16_ld_b(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
__hmma_m16n16k16_ld_b(dst, src+2, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
}