From: Matt Arsenault Date: Wed, 10 Jul 2019 16:09:26 +0000 (+0000) Subject: AMDGPU: Serialize mode from MachineFunctionInfo X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5cf57efaf1d288205b90837a2b658ded64826782;p=llvm AMDGPU: Serialize mode from MachineFunctionInfo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365653 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7414519aee1..672e49184a5 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1144,5 +1144,8 @@ bool GCNTargetMachine::parseMachineFunctionInfo( MFI->ArgInfo.WorkItemIDZ))) return true; + MFI->Mode.IEEE = YamlMFI.Mode.IEEE; + MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp; + return false; } diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index e70a51bfd3d..a3f6caaacc8 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -418,7 +418,8 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), - ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)) {} + ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), + Mode(MFI.getMode()) {} void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits::mapping(YamlIO, *this); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 2cbca8930a6..a8928dacf77 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -232,6 +232,31 @@ template <> struct MappingTraits { } }; +// Default to default mode for default calling convention. +struct SIMode { + bool IEEE = true; + bool DX10Clamp = true; + + SIMode() = default; + + + SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { + IEEE = Mode.IEEE; + DX10Clamp = Mode.DX10Clamp; + } + + bool operator ==(const SIMode Other) const { + return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; + } +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, SIMode &Mode) { + YamlIO.mapOptional("ieee", Mode.IEEE, true); + YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); + } +}; + struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { uint64_t ExplicitKernArgSize = 0; unsigned MaxKernArgAlign = 0; @@ -247,6 +272,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { StringValue StackPtrOffsetReg = "$sp_reg"; Optional ArgInfo; + SIMode Mode; SIMachineFunctionInfo() = default; SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, @@ -275,6 +301,7 @@ template <> struct MappingTraits { YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, StringValue("$sp_reg")); YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); + YamlIO.mapOptional("mode", MFI.Mode, SIMode()); } }; diff --git a/test/CodeGen/AMDGPU/omod-nsz-flag.mir b/test/CodeGen/AMDGPU/omod-nsz-flag.mir index b53813a3d91..c2bf002a92d 100644 --- a/test/CodeGen/AMDGPU/omod-nsz-flag.mir +++ b/test/CodeGen/AMDGPU/omod-nsz-flag.mir @@ -1,20 +1,5 @@ # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s ---- | - define amdgpu_ps void @omod_inst_flag_nsz_src() { - unreachable - } - - define amdgpu_ps void @omod_inst_flag_nsz_result() { - unreachable - } - - define amdgpu_ps void @omod_inst_flag_nsz_both() { - unreachable - } - -... - --- # FIXME: Is it OK to fold omod for this? @@ -24,6 +9,9 @@ # GCN-NEXT: S_ENDPGM 0, implicit %1 name: omod_inst_flag_nsz_src tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false body: | bb.0: @@ -42,6 +30,9 @@ body: | name: omod_inst_flag_nsz_result tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false body: | bb.0: @@ -60,6 +51,9 @@ body: | name: omod_inst_flag_nsz_both tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false body: | bb.0: diff --git a/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 73d0855f612..4523af65645 100644 --- a/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -22,6 +22,9 @@ # FULL-NEXT: workGroupIDX: { reg: '$sgpr6' } # FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # FULL-NEXT: workItemIDX: { reg: '$vgpr0' } +# FULL-NEXT: mode: +# FULL-NEXT: ieee: true +# FULL-NEXT: dx10-clamp: true # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -85,6 +88,9 @@ body: | # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' } +# FULL-NEXT: mode: +# FULL-NEXT: ieee: true +# FULL-NEXT: dx10-clamp: true # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -117,6 +123,9 @@ body: | # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' } +# FULL-NEXT: mode: +# FULL-NEXT: ieee: true +# FULL-NEXT: dx10-clamp: true # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -150,6 +159,9 @@ body: | # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' } +# FULL-NEXT: mode: +# FULL-NEXT: ieee: true +# FULL-NEXT: dx10-clamp: true # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -214,3 +226,20 @@ body: | S_ENDPGM 0 ... + +--- +# ALL-LABEL: name: parse_mode +# ALL: mode: +# ALL-NEXT: ieee: false +# ALL-NEXT: dx10-clamp: false +name: parse_mode +machineFunctionInfo: + mode: + ieee: false + dx10-clamp: false + +body: | + bb.0: + S_ENDPGM 0 + +... diff --git a/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index 0fdbce5208d..79d3d82cc84 100644 --- a/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -25,6 +25,9 @@ ; CHECK-NEXT: workGroupIDX: { reg: '$sgpr6' } ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } ; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' } +; CHECK-NEXT: mode: +; CHECK-NEXT: ieee: true +; CHECK-NEXT: dx10-clamp: true ; CHECK-NEXT: body: define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { %gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0 @@ -48,6 +51,9 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: argumentInfo: ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr3' } ; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' } +; CHECK-NEXT: mode: +; CHECK-NEXT: ieee: false +; CHECK-NEXT: dx10-clamp: true ; CHECK-NEXT: body: define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { ret void @@ -69,6 +75,9 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { ; CHECK-NEXT: argumentInfo: ; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' } +; CHECK-NEXT: mode: +; CHECK-NEXT: ieee: true +; CHECK-NEXT: dx10-clamp: true ; CHECK-NEXT: body: define void @function() { ret void @@ -90,9 +99,40 @@ define void @function() { ; CHECK-NEXT: argumentInfo: ; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' } +; CHECK-NEXT: mode: +; CHECK-NEXT: ieee: true +; CHECK-NEXT: dx10-clamp: true ; CHECK-NEXT: body: define void @function_nsz() #0 { ret void } +; CHECK-LABEL: {{^}}name: function_dx10_clamp_off +; CHECK: mode: +; CHECK-NEXT: ieee: true +; CHECK-NEXT: dx10-clamp: false +define void @function_dx10_clamp_off() #1 { + ret void +} + +; CHECK-LABEL: {{^}}name: function_ieee_off +; CHECK: mode: +; CHECK-NEXT: ieee: false +; CHECK-NEXT: dx10-clamp: true +define void @function_ieee_off() #2 { + ret void +} + +; CHECK-LABEL: {{^}}name: function_ieee_off_dx10_clamp_off +; CHECK: mode: +; CHECK-NEXT: ieee: false +; CHECK-NEXT: dx10-clamp: false +define void @function_ieee_off_dx10_clamp_off() #3 { + ret void +} + attributes #0 = { "no-signed-zeros-fp-math" = "true" } + +attributes #1 = { "amdgpu-dx10-clamp" = "false" } +attributes #2 = { "amdgpu-ieee" = "false" } +attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" }