From: Matt Arsenault Date: Mon, 23 Jan 2017 22:31:03 +0000 (+0000) Subject: AMDGPU: Combine fp16/fp64 subtarget features X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f3a691f0b081077b13e8d462f841aaaf58d20211;p=llvm AMDGPU: Combine fp16/fp64 subtarget features The same control register controls both, and are set to the same defaults. Keep the old names around as aliases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292837 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 46cd112ad49..0c5f9e72b52 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -206,12 +206,6 @@ def FeatureDPP : SubtargetFeature<"dpp", // Subtarget Features (options and debugging) //===------------------------------------------------------------===// -def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals", - "FP16Denormals", - "true", - "Enable half precision denormal handling" ->; - // Some instructions do not support denormals despite this flag. Using // fp32 denormals also causes instructions to run at the double // precision rate for the device. @@ -221,13 +215,30 @@ def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", "Enable single precision denormal handling" >; -def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", - "FP64Denormals", +// Denormal handling for fp64 and fp16 is controlled by the same +// config register when fp16 supported. +// TODO: Do we need a separate f16 setting when not legal? +def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals", + "FP64FP16Denormals", "true", - "Enable double precision denormal handling", + "Enable double and half precision denormal handling", [FeatureFP64] >; +def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", + "FP64FP16Denormals", + "true", + "Enable double and half precision denormal handling", + [FeatureFP64, FeatureFP64FP16Denormals] +>; + +def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals", + "FP64FP16Denormals", + "true", + "Enable half precision denormal handling", + [FeatureFP64FP16Denormals] +>; + def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", "FPExceptions", "true", diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 4e8529b57ab..4848b3b86bd 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -41,9 +41,10 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, // for SI has the unhelpful behavior that it unsets everything else if you // disable it. - SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); + SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-for-global,+unaligned-buffer-access,"; + FullFS += FS; ParseSubtargetFeatures(GPU, FullFS); @@ -52,9 +53,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - FP16Denormals = false; + FP64FP16Denormals = false; FP32Denormals = false; - FP64Denormals = false; } // Set defaults if needed. @@ -78,9 +78,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FastFMAF32(false), HalfRate64Ops(false), - FP16Denormals(false), FP32Denormals(false), - FP64Denormals(false), + FP64FP16Denormals(false), FPExceptions(false), FlatForGlobal(false), UnalignedScratchAccess(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 26c4c34c988..c8414f97808 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -81,9 +81,8 @@ protected: bool HalfRate64Ops; // Dynamially set bits that enable features. - bool FP16Denormals; bool FP32Denormals; - bool FP64Denormals; + bool FP64FP16Denormals; bool FPExceptions; bool FlatForGlobal; bool UnalignedScratchAccess; @@ -282,7 +281,7 @@ public: unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; bool hasFP16Denormals() const { - return FP16Denormals; + return FP64FP16Denormals; } bool hasFP32Denormals() const { @@ -290,7 +289,7 @@ public: } bool hasFP64Denormals() const { - return FP64Denormals; + return FP64FP16Denormals; } bool hasFPExceptions() const { diff --git a/test/CodeGen/AMDGPU/default-fp-mode.ll b/test/CodeGen/AMDGPU/default-fp-mode.ll index 28d065e3b32..0969fc1caff 100644 --- a/test/CodeGen/AMDGPU/default-fp-mode.ll +++ b/test/CodeGen/AMDGPU/default-fp-mode.ll @@ -54,6 +54,34 @@ define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* % ret void } +; GCN-LABEL: {{^}}test_f16_f64_denormals: +; GCN: FloatMode: 192 +; GCN: IeeeMode: 1 +define void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_no_f16_f64_denormals: +; GCN: FloatMode: 0 +; GCN: IeeeMode: 1 +define void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_f32_f16_f64_denormals: +; GCN: FloatMode: 240 +; GCN: IeeeMode: 1 +define void @test_f32_f16_f64_denormals(half addrspace(1)* %out0, float addrspace(1)* %out1, double addrspace(1)* %out2) #8 { + store half 0.0, half addrspace(1)* %out0 + store float 0.0, float addrspace(1)* %out1 + store double 0.0, double addrspace(1)* %out2 + ret void +} + ; GCN-LABEL: {{^}}kill_gs_const: ; GCN: IeeeMode: 0 define amdgpu_gs void @kill_gs_const() { @@ -87,4 +115,7 @@ attributes #1 = { nounwind "target-cpu"="fiji" } attributes #2 = { nounwind "target-features"="+fp64-denormals" } attributes #3 = { nounwind "target-features"="+fp32-denormals" } attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } -attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } +attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } +attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" } +attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" } +attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" } diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index aa0d6cb1067..bdfb0050e62 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -69,10 +69,10 @@ define void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 { ret void } -; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16: -; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}} ; GCN: buffer_store_short [[REG]] -define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 { +define void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 { %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF) store half %canonicalized, half addrspace(1)* %out ret void @@ -87,10 +87,10 @@ define void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* % ret void } -; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16: -; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}} ; GCN: buffer_store_short [[REG]] -define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 { +define void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 { %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF) store half %canonicalized, half addrspace(1)* %out ret void @@ -282,7 +282,7 @@ define void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) } ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16: -; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}} ; GCN: buffer_store_dword [[REG]] define void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 { %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> ) @@ -300,7 +300,7 @@ define void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspa } ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16: -; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}} ; GCN: buffer_store_dword [[REG]] define void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 { %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> ) @@ -382,5 +382,5 @@ define void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* % attributes #0 = { nounwind readnone } attributes #1 = { nounwind } -attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" } -attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" } +attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" } +attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" } diff --git a/test/CodeGen/AMDGPU/fcanonicalize.ll b/test/CodeGen/AMDGPU/fcanonicalize.ll index 981d88dfe94..bbe8a1e0143 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -347,5 +347,5 @@ define void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) # attributes #0 = { nounwind readnone } attributes #1 = { nounwind } -attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } -attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } +attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" } diff --git a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll index 10acae092e9..84c35e1aa03 100644 --- a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -1,5 +1,7 @@ ; XUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s + ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't ; make add an instruction if the fadd has more than one use. @@ -115,7 +117,8 @@ define void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, float %y) #0 { ; VI: v_cndmask_b32_e32 ; VI: v_add_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; VI: v_mul_f16_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} -; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 +; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 +; VI-DENORM: v_fma_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half @@ -136,7 +139,10 @@ define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x. ; GCN-LABEL: {{^}}multiple_use_fadd_fmac_f16: ; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}} -; GCN-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0 + +; VI-FLUSH-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0 +; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}} + ; GCN-DAG: buffer_store_short [[MUL2]] ; GCN-DAG: buffer_store_short [[MAD]] ; GCN: s_endpgm @@ -153,7 +159,10 @@ define void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i16 zeroext %x. ; GCN-LABEL: {{^}}multiple_use_fadd_fmad_f16: ; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}| -; GCN-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}} + +; VI-FLUSH-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}} +; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}} + ; GCN-DAG: buffer_store_short [[MUL2]] ; GCN-DAG: buffer_store_short [[MAD]] ; GCN: s_endpgm @@ -170,8 +179,12 @@ define void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i16 zeroext %x. } ; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad_f16: -; GCN: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}} -; GCN: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}} +; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}} +; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}} + +; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}} +; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}} + define void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half diff --git a/test/CodeGen/AMDGPU/fmuladd.f16.ll b/test/CodeGen/AMDGPU/fmuladd.f16.ll index 500b00bdcf8..477ae3da507 100644 --- a/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s - -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s + +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare half @llvm.fmuladd.f16(half, half, half) #1 diff --git a/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/test/CodeGen/AMDGPU/hsa-fp-mode.ll index 51d6aee25f4..977667cbe6c 100644 --- a/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ b/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -62,7 +62,7 @@ define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* % attributes #0 = { nounwind "target-cpu"="kaveri" } attributes #1 = { nounwind "target-cpu"="fiji" } -attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" } -attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" } -attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } -attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } +attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-fp16-denormals" } +attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" } +attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index af7bd270394..ee199518ad7 100644 --- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s -; RUN: llc -march=amdgcn -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s declare half @llvm.fmuladd.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) diff --git a/test/CodeGen/AMDGPU/v_mac.ll b/test/CodeGen/AMDGPU/v_mac.ll index 16aed5928b0..4dc8d9608c1 100644 --- a/test/CodeGen/AMDGPU/v_mac.ll +++ b/test/CodeGen/AMDGPU/v_mac.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s ; GCN-LABEL: {{^}}mac_vvv: ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}} @@ -250,8 +251,8 @@ bb: ; FIXME: How is this not folded? ; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 0x3c00 -; VI: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]] -; VI: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 +; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]] +; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 define void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll index ecd5b01545d..ebec10e6382 100644 --- a/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mac_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] @@ -604,5 +604,5 @@ entry: ret void } -attributes #0 = {"unsafe-fp-math"="false"} -attributes #1 = {"unsafe-fp-math"="true"} +attributes #0 = { nounwind "unsafe-fp-math"="false" } +attributes #1 = { nounwind "unsafe-fp-math"="true" } diff --git a/test/CodeGen/AMDGPU/v_madak_f16.ll b/test/CodeGen/AMDGPU/v_madak_f16.ll index fd5ad3e3d60..553e13dd074 100644 --- a/test/CodeGen/AMDGPU/v_madak_f16.ll +++ b/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}madak_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]