MFI->ArgInfo.WorkItemIDZ)))
return true;
+ MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
+ MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+
return false;
}
ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
- ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)) {}
+ ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
+ Mode(MFI.getMode()) {}
void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
}
};
+// Default to default mode for default calling convention.
+struct SIMode {
+ bool IEEE = true;
+ bool DX10Clamp = true;
+
+ SIMode() = default;
+
+
+ SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
+ IEEE = Mode.IEEE;
+ DX10Clamp = Mode.DX10Clamp;
+ }
+
+ bool operator ==(const SIMode Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ }
+};
+
+template <> struct MappingTraits<SIMode> {
+ static void mapping(IO &YamlIO, SIMode &Mode) {
+ YamlIO.mapOptional("ieee", Mode.IEEE, true);
+ YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+ }
+};
+
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint64_t ExplicitKernArgSize = 0;
unsigned MaxKernArgAlign = 0;
StringValue StackPtrOffsetReg = "$sp_reg";
Optional<SIArgumentInfo> ArgInfo;
+ SIMode Mode;
SIMachineFunctionInfo() = default;
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
StringValue("$sp_reg"));
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
+ YamlIO.mapOptional("mode", MFI.Mode, SIMode());
}
};
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
---- |
- define amdgpu_ps void @omod_inst_flag_nsz_src() {
- unreachable
- }
-
- define amdgpu_ps void @omod_inst_flag_nsz_result() {
- unreachable
- }
-
- define amdgpu_ps void @omod_inst_flag_nsz_both() {
- unreachable
- }
-
-...
-
---
# FIXME: Is it OK to fold omod for this?
# GCN-NEXT: S_ENDPGM 0, implicit %1
name: omod_inst_flag_nsz_src
tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
body: |
bb.0:
name: omod_inst_flag_nsz_result
tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
body: |
bb.0:
name: omod_inst_flag_nsz_both
tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
body: |
bb.0:
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
S_ENDPGM 0
...
+
+---
+# ALL-LABEL: name: parse_mode
+# ALL: mode:
+# ALL-NEXT: ieee: false
+# ALL-NEXT: dx10-clamp: false
+name: parse_mode
+machineFunctionInfo:
+ mode:
+ ieee: false
+ dx10-clamp: false
+
+body: |
+ bb.0:
+ S_ENDPGM 0
+
+...
; CHECK-NEXT: workGroupIDX: { reg: '$sgpr6' }
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr3' }
; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: false
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
ret void
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define void @function() {
ret void
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define void @function_nsz() #0 {
ret void
}
+; CHECK-LABEL: {{^}}name: function_dx10_clamp_off
+; CHECK: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: false
+define void @function_dx10_clamp_off() #1 {
+ ret void
+}
+
+; CHECK-LABEL: {{^}}name: function_ieee_off
+; CHECK: mode:
+; CHECK-NEXT: ieee: false
+; CHECK-NEXT: dx10-clamp: true
+define void @function_ieee_off() #2 {
+ ret void
+}
+
+; CHECK-LABEL: {{^}}name: function_ieee_off_dx10_clamp_off
+; CHECK: mode:
+; CHECK-NEXT: ieee: false
+; CHECK-NEXT: dx10-clamp: false
+define void @function_ieee_off_dx10_clamp_off() #3 {
+ ret void
+}
+
attributes #0 = { "no-signed-zeros-fp-math" = "true" }
+
+attributes #1 = { "amdgpu-dx10-clamp" = "false" }
+attributes #2 = { "amdgpu-ieee" = "false" }
+attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" }