AMDGPU: Keep track of modifiers when converting v_mac to v_mad

author Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 11 Mar 2017 05:40:40 +0000 (05:40 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 11 Mar 2017 05:40:40 +0000 (05:40 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 11 Mar 2017 05:40:40 +0000 (05:40 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 11 Mar 2017 05:40:40 +0000 (05:40 +0000)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index 1833b324915e9cdee13872a681cf3aea4f88181a..63524d22a34cce099906af984988e24681dab309 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1766,20 +1766,26 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
  
    const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
    const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
+  const MachineOperand *Src0Mods =
+    getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
    const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+  const MachineOperand *Src1Mods =
+    getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
    const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+  const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
+  const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
  
    return BuildMI(*MBB, MI, MI.getDebugLoc(),
                   get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
        .add(*Dst)
-      .addImm(0) // Src0 mods
+      .addImm(Src0Mods ? Src0Mods->getImm() : 0)
        .add(*Src0)
-      .addImm(0) // Src1 mods
+      .addImm(Src1Mods ? Src1Mods->getImm() : 0)
        .add(*Src1)
        .addImm(0) // Src mods
        .add(*Src2)
-      .addImm(0)  // clamp
-      .addImm(0); // omod
+      .addImm(Clamp ? Clamp->getImm() : 0)
+      .addImm(Omod ? Omod->getImm() : 0);
  }
  
  // It's not generally safe to move VALU instructions across these since it will
diff --git a/test/CodeGen/AMDGPU/clamp-modifier.ll b/test/CodeGen/AMDGPU/clamp-modifier.ll

index 186bd349ecc44ec316277d442929e0096569d9dc..c3a7d5e14d87cc216f456088e978abd42fc98092 100644 (file)
--- a/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -168,6 +168,23 @@ define amdgpu_kernel void @v_clamp_add_src_f64(double addrspace(1)* %out, double
    ret void
  }
  
+; GCN-LABEL: {{^}}v_clamp_mac_to_mad:
+; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]}} clamp{{$}}
+define amdgpu_kernel void @v_clamp_mac_to_mad(float addrspace(1)* %out, float addrspace(1)* %aptr, float %a) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %b = load float, float addrspace(1)* %gep0
+
+  %mul = fmul float %a, %a
+  %add = fadd float %mul, %b
+  %max = call float @llvm.maxnum.f32(float %add, float 0.0)
+  %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
+  %res = fadd float %clamp, %b
+  store float %res, float addrspace(1)* %out.gep
+  ret void
+}
+
  declare i32 @llvm.amdgcn.workitem.id.x() #1
  declare float @llvm.fabs.f32(float) #1
  declare float @llvm.floor.f32(float) #1
diff --git a/test/CodeGen/AMDGPU/omod.ll b/test/CodeGen/AMDGPU/omod.ll

index d48956f534ed5e83d691530f7f88ffe6cd10accf..3fd7b13fcc586ccb307d27f3490123da7e899074 100644 (file)
--- a/test/CodeGen/AMDGPU/omod.ll
+++ b/test/CodeGen/AMDGPU/omod.ll
@@ -250,6 +250,17 @@ define amdgpu_ps void @v_omod_div2_f16_no_denormals(half %a) #3 {
    ret void
  }
  
+; GCN-LABEL: {{^}}v_omod_mac_to_mad:
+; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} mul:2{{$}}
+define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
+  %mul = fmul float %a, %a
+  %add = fadd float %mul, %b
+  %mad = fmul float %add, 2.0
+  %res = fmul float %mad, %b
+  store float %res, float addrspace(1)* undef
+  ret void
+}
+
  declare i32 @llvm.amdgcn.workitem.id.x() #1
  declare float @llvm.fabs.f32(float) #1
  declare float @llvm.floor.f32(float) #1
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 11 Mar 2017 05:40:40 +0000 (05:40 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 11 Mar 2017 05:40:40 +0000 (05:40 +0000)
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/clamp-modifier.ll		patch \| blob \| history
test/CodeGen/AMDGPU/omod.ll		patch \| blob \| history