Add support for VSX FMA single-precision instructions to the PPC back end

author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Fri, 29 May 2015 17:13:25 +0000 (17:13 +0000)

committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Fri, 29 May 2015 17:13:25 +0000 (17:13 +0000)
author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Fri, 29 May 2015 17:13:25 +0000 (17:13 +0000)
committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Fri, 29 May 2015 17:13:25 +0000 (17:13 +0000)
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp

index 0b8e23c4ebf8f3fbd59d937541fd510c80803b56..a561d5b1190a540f508d2da484002d0f53fd73f8 100644 (file)
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -148,6 +148,9 @@ class PPCFastISel final : public FastISel {
      bool isVSFRCRegister(unsigned Register) const {
        return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
      }
+    bool isVSSRCRegister(unsigned Register) const {
+      return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
+    }
      bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
                      bool isZExt, unsigned DestReg);
      bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
@@ -503,8 +506,11 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
  
    // If this is a potential VSX load with an offset of 0, a VSX indexed load can
    // be used.
+  bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
    bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
-  if (IsVSFRC && (Opc == PPC::LFD) && 
+  bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
+  bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
+  if ((Is32VSXLoad || Is64VSXLoad) &&
        (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
        (Addr.Offset == 0)) {
      UseOffset = false;
@@ -518,7 +524,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    // into a RegBase.
    if (Addr.BaseType == Address::FrameIndexBase) {
      // VSX only provides an indexed load.
-    if (IsVSFRC && Opc == PPC::LFD) return false;
+    if (Is32VSXLoad || Is64VSXLoad) return false;
  
      MachineMemOperand *MMO =
        FuncInfo.MF->getMachineMemOperand(
@@ -532,7 +538,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    // Base reg with offset in range.
    } else if (UseOffset) {
      // VSX only provides an indexed load.
-    if (IsVSFRC && Opc == PPC::LFD) return false;
+    if (Is32VSXLoad || Is64VSXLoad) return false;
  
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
        .addImm(Addr.Offset).addReg(Addr.Base.Reg);
@@ -555,7 +561,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
        case PPC::LWA:    Opc = PPC::LWAX;    break;
        case PPC::LWA_32: Opc = PPC::LWAX_32; break;
        case PPC::LD:     Opc = PPC::LDX;     break;
-      case PPC::LFS:    Opc = PPC::LFSX;    break;
+      case PPC::LFS:    Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
        case PPC::LFD:    Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
      }
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
@@ -636,9 +642,12 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
  
    // If this is a potential VSX store with an offset of 0, a VSX indexed store
    // can be used.
+  bool IsVSSRC = isVSSRCRegister(SrcReg);
    bool IsVSFRC = isVSFRCRegister(SrcReg);
-  if (IsVSFRC && (Opc == PPC::STFD) && 
-      (Addr.BaseType != Address::FrameIndexBase) && UseOffset && 
+  bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
+  bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
+  if ((Is32VSXStore || Is64VSXStore) &&
+      (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
        (Addr.Offset == 0)) {
      UseOffset = false;
    }
@@ -648,7 +657,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
    // into a RegBase.
    if (Addr.BaseType == Address::FrameIndexBase) {
      // VSX only provides an indexed store.
-    if (IsVSFRC && Opc == PPC::STFD) return false;
+    if (Is32VSXStore || Is64VSXStore) return false;
  
      MachineMemOperand *MMO =
        FuncInfo.MF->getMachineMemOperand(
@@ -665,7 +674,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
    // Base reg with offset in range.
    } else if (UseOffset) {
      // VSX only provides an indexed store.
-    if (IsVSFRC && Opc == PPC::STFD) return false;
+    if (Is32VSXStore || Is64VSXStore) return false;
      
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
        .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
@@ -684,7 +693,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
        case PPC::STH8: Opc = PPC::STHX8; break;
        case PPC::STW8: Opc = PPC::STWX8; break;
        case PPC::STD:  Opc = PPC::STDX;  break;
-      case PPC::STFS: Opc = PPC::STFSX; break;
+      case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
        case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
      }
  
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td

index 9685bac2aebbb86b4ebf2424317c2ecfaeef2d38..d08b80871f3eb570d0fc55fac98f3b1a473307e8 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1078,6 +1078,82 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                          (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                          "xssubsp $XT, $XA, $XB", IIC_VecFP,
                          [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+
+  // FMA Instructions
+  let BaseName = "XSMADDASP" in {
+  let isCommutable = 1 in
+  def XSMADDASP : XX3Form<60, 1,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSMADDMSP : XX3Form<60, 9,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
+
+  let BaseName = "XSMSUBASP" in {
+  let isCommutable = 1 in
+  def XSMSUBASP : XX3Form<60, 17,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fma f32:$XA, f32:$XB,
+                                              (fneg f32:$XTi)))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSMSUBMSP : XX3Form<60, 25,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
+
+  let BaseName = "XSNMADDASP" in {
+  let isCommutable = 1 in
+  def XSNMADDASP : XX3Form<60, 129,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+                                                    f32:$XTi)))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSNMADDMSP : XX3Form<60, 137,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
+
+  let BaseName = "XSNMSUBASP" in {
+  let isCommutable = 1 in
+  def XSNMSUBASP : XX3Form<60, 145,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+                                                    (fneg f32:$XTi))))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSNMSUBMSP : XX3Form<60, 153,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
  } // AddedComplexity = 400
  } // HasP8Vector
  
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll

index ab5251b2a554f2fab4d1796ad65bd15aa11ae669..9cfef398edfd29a7e92bba63b86c5d8747d81999 100644 (file)
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,9 +1,12 @@
  ; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
  ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
  
  declare double @dummy1(double) #0
  declare double @dummy2(double, double) #0
  declare double @dummy3(double, double, double) #0
+declare float @dummy4(float, float) #0
  
  define double @test_FMADD1(double %A, double %B, double %C) {
         %D = fmul double %A, %B         ; <double> [#uses=1]
@@ -126,3 +129,83 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
  ; CHECK-VSX: fnmsubs
  ; CHECK-VSX-NEXT: blr
  }
+
+define float @test_XSMADDMSP(float %A, float %B, float %C) {
+       %D = fmul float %A, %B          ; <float> [#uses=1]
+       %E = fadd float %C, %D          ; <float> [#uses=1]
+       ret float %E
+; CHECK-P8-LABEL: test_XSMADDMSP:
+; CHECK-P8: xsmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMSUBMSP(float %A, float %B, float %C) {
+       %D = fmul float %A, %B          ; <float> [#uses=1]
+       %E = fsub float %D, %C          ; <float> [#uses=1]
+       ret float %E
+; CHECK-P8-LABEL: test_XSMSUBMSP:
+; CHECK-P8: xsmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMADDASP(float %A, float %B, float %C, float %D) {
+       %E = fmul float %A, %B  ; <float> [#uses=2]
+       %F = fadd float %E, %C  ; <float> [#uses=1]
+       %G = fsub float %E, %D  ; <float> [#uses=1]
+       %H = call float @dummy4(float %F, float %G)      ; <float> [#uses=1]
+       ret float %H
+; CHECK-P8-LABEL: test_XSMADDASP:
+; CHECK-P8: xsmaddasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSMSUBASP(float %A, float %B, float %C, float %D) {
+       %E = fmul float %A, %B  ; <float> [#uses=2]
+       %F = fsub float %E, %C  ; <float> [#uses=1]
+       %G = fsub float %E, %D  ; <float> [#uses=1]
+       %H = call float @dummy4(float %F, float %G)      ; <float> [#uses=1]
+       ret float %H
+; CHECK-P8-LABEL: test_XSMSUBASP:
+; CHECK-P8: xsmsubasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSNMADDMSP(float %A, float %B, float %C) {
+       %D = fmul float %A, %B          ; <float> [#uses=1]
+       %E = fadd float %D, %C          ; <float> [#uses=1]
+       %F = fsub float -0.000000e+00, %E               ; <float> [#uses=1]
+       ret float %F
+; CHECK-P8-LABEL: test_XSNMADDMSP:
+; CHECK-P8: xsnmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMSUBMSP(float %A, float %B, float %C) {
+       %D = fmul float %A, %B          ; <float> [#uses=1]
+       %E = fsub float %D, %C          ; <float> [#uses=1]
+       %F = fsub float -0.000000e+00, %E               ; <float> [#uses=1]
+       ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBMSP:
+; CHECK-P8: xsnmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMADDASP(float %A, float %B, float %C) {
+       %D = fmul float %A, %B          ; <float> [#uses=1]
+       %E = fadd float %D, %C          ; <float> [#uses=1]
+       %F = fsub float -0.000000e+00, %E               ; <float> [#uses=1]
+       %H = call float @dummy4(float %E, float %F)      ; <float> [#uses=1]
+       ret float %F
+; CHECK-P8-LABEL: test_XSNMADDASP:
+; CHECK-P8: xsnmaddasp
+}
+
+define float @test_XSNMSUBASP(float %A, float %B, float %C) {
+       %D = fmul float %A, %B          ; <float> [#uses=1]
+       %E = fsub float %D, %C          ; <float> [#uses=1]
+       %F = fsub float -0.000000e+00, %E               ; <float> [#uses=1]
+       %H = call float @dummy4(float %E, float %F)      ; <float> [#uses=1]
+       ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBASP:
+; CHECK-P8: xsnmsubasp
+}
diff --git a/test/CodeGen/PowerPC/vsx-fma-sp.ll b/test/CodeGen/PowerPC/vsx-fma-sp.ll

new file mode 100644 (file)

index 0000000..1c3e457
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-fma-sp.ll
@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
+define void @test1sp(float %a, float %b, float %c, float %e, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  ret void
+
+; CHECK-LABEL: @test1sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 4
+; CHECK-DAG: stxsspx 3, 0, 7
+; CHECK-DAG: stxsspx 1, 7, [[C1]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test1sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 7
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 1, 7, [[C1]]
+; CHECK-FISL: blr
+}
+
+define void @test2sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  %2 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+  store float %2, float* %arrayidx2, align 4
+  ret void
+
+; CHECK-LABEL: @test2sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+; CHECK-DAG: stxsspx 3, 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test2sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+define void @test3sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 3
+  store float %2, float* %arrayidx1, align 4
+  %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+  store float %3, float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx3, align 4
+  ret void
+
+; CHECK-LABEL: @test3sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 12
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: li [[C3:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+
+; CHECK-DAG: xsmaddmsp 3, 2, 4
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 3, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test3sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: fmr 4, [[F1]]
+; CHECK-FISL-DAG: xsmaddasp 4, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL: blr
+}
+
+define void @test4sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+  %arrayidx3 = getelementptr inbounds float, float* %d, i64 3
+  store float %2, float* %arrayidx3, align 4
+  %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx4 = getelementptr inbounds float, float* %d, i64 2
+  store float %3, float* %arrayidx4, align 4
+  ret void
+
+; CHECK-LABEL: @test4sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 12
+; CHECK-DAG: xsmaddasp 4, 2, 3
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test4sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr [[F1]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+declare float @llvm.fma.f32(float, float, float) #0
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt

index 04b2eeb0dd2aa3d0bd8a142d5dd3fdb30e08137f..6f4ba6f6b9ac22f2cc823e200717fd4e08afdb1c 100644 (file)
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -90,6 +90,12 @@
  # CHECK: xsmaddmdp 7, 63, 27
  0xf0 0xff 0xd9 0x4c
  
+# CHECK: xsmaddasp 7, 63, 27
+0xf0 0xff 0xd8 0x0c
+
+# CHECK: xsmaddmsp 7, 63, 27
+0xf0 0xff 0xd8 0x4c
+
  # CHECK: xsmaxdp 7, 63, 27
  0xf0 0xff 0xdd 0x04
  
@@ -102,6 +108,12 @@
  # CHECK: xsmsubmdp 7, 63, 27
  0xf0 0xff 0xd9 0xcc
  
+# CHECK: xsmsubasp 7, 63, 27
+0xf0 0xff 0xd8 0x8c
+
+# CHECK: xsmsubmsp 7, 63, 27
+0xf0 0xff 0xd8 0xcc
+
  # CHECK: xsmulsp 7, 63, 27
  0xf0 0xff 0xd8 0x84
  
@@ -126,6 +138,18 @@
  # CHECK: xsnmsubmdp 7, 63, 27
  0xf0 0xff 0xdd 0xcc
  
+# CHECK: xsnmaddasp 7, 63, 27
+0xf0 0xff 0xdc 0x0c
+
+# CHECK: xsnmaddmsp 7, 63, 27
+0xf0 0xff 0xdc 0x4c
+
+# CHECK: xsnmsubasp 7, 63, 27
+0xf0 0xff 0xdc 0x8c
+
+# CHECK: xsnmsubmsp 7, 63, 27
+0xf0 0xff 0xdc 0xcc
+
  # CHECK: xsrdpi 7, 27
  0xf0 0xe0 0xd9 0x24
  
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s

index 773fc9eef6d16b6aa4c644d46507daf555ba2b3b..352fc5173800037ae499d3126c1084449a1e25bd 100644 (file)
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@@ -95,6 +95,12 @@
  # CHECK-BE: xsmaddmdp 7, 63, 27                # encoding: [0xf0,0xff,0xd9,0x4c]
  # CHECK-LE: xsmaddmdp 7, 63, 27                # encoding: [0x4c,0xd9,0xff,0xf0]
              xsmaddmdp 7, 63, 27
+# CHECK-BE: xsmaddasp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x0c]
+# CHECK-LE: xsmaddasp 7, 63, 27                # encoding: [0x0c,0xd8,0xff,0xf0]
+            xsmaddasp 7, 63, 27
+# CHECK-BE: xsmaddmsp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x4c]
+# CHECK-LE: xsmaddmsp 7, 63, 27                # encoding: [0x4c,0xd8,0xff,0xf0]
+            xsmaddmsp 7, 63, 27
  # CHECK-BE: xsmaxdp 7, 63, 27                  # encoding: [0xf0,0xff,0xdd,0x04]
  # CHECK-LE: xsmaxdp 7, 63, 27                  # encoding: [0x04,0xdd,0xff,0xf0]
              xsmaxdp 7, 63, 27
@@ -107,6 +113,12 @@
  # CHECK-BE: xsmsubmdp 7, 63, 27                # encoding: [0xf0,0xff,0xd9,0xcc]
  # CHECK-LE: xsmsubmdp 7, 63, 27                # encoding: [0xcc,0xd9,0xff,0xf0]
              xsmsubmdp 7, 63, 27
+# CHECK-BE: xsmsubasp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x8c]
+# CHECK-LE: xsmsubasp 7, 63, 27                # encoding: [0x8c,0xd8,0xff,0xf0]
+            xsmsubasp 7, 63, 27
+# CHECK-BE: xsmsubmsp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0xcc]
+# CHECK-LE: xsmsubmsp 7, 63, 27                # encoding: [0xcc,0xd8,0xff,0xf0]
+            xsmsubmsp 7, 63, 27
  # CHECK-BE: xsmulsp 7, 63, 27                  # encoding: [0xf0,0xff,0xd8,0x84]
  # CHECK-LE: xsmulsp 7, 63, 27                  # encoding: [0x84,0xd8,0xff,0xf0]
              xsmulsp 7, 63, 27
@@ -131,6 +143,18 @@
  # CHECK-BE: xsnmsubmdp 7, 63, 27               # encoding: [0xf0,0xff,0xdd,0xcc]
  # CHECK-LE: xsnmsubmdp 7, 63, 27               # encoding: [0xcc,0xdd,0xff,0xf0]
              xsnmsubmdp 7, 63, 27
+# CHECK-BE: xsnmaddasp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x0c]
+# CHECK-LE: xsnmaddasp 7, 63, 27               # encoding: [0x0c,0xdc,0xff,0xf0]
+            xsnmaddasp 7, 63, 27
+# CHECK-BE: xsnmaddmsp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x4c]
+# CHECK-LE: xsnmaddmsp 7, 63, 27               # encoding: [0x4c,0xdc,0xff,0xf0]
+            xsnmaddmsp 7, 63, 27
+# CHECK-BE: xsnmsubasp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x8c]
+# CHECK-LE: xsnmsubasp 7, 63, 27               # encoding: [0x8c,0xdc,0xff,0xf0]
+            xsnmsubasp 7, 63, 27
+# CHECK-BE: xsnmsubmsp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0xcc]
+# CHECK-LE: xsnmsubmsp 7, 63, 27               # encoding: [0xcc,0xdc,0xff,0xf0]
+            xsnmsubmsp 7, 63, 27
  # CHECK-BE: xsrdpi 7, 27                       # encoding: [0xf0,0xe0,0xd9,0x24]
  # CHECK-LE: xsrdpi 7, 27                       # encoding: [0x24,0xd9,0xe0,0xf0]
              xsrdpi 7, 27
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Fri, 29 May 2015 17:13:25 +0000 (17:13 +0000)
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Fri, 29 May 2015 17:13:25 +0000 (17:13 +0000)
lib/Target/PowerPC/PPCFastISel.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCInstrVSX.td		patch \| blob \| history
test/CodeGen/PowerPC/fma.ll		patch \| blob \| history
test/CodeGen/PowerPC/vsx-fma-sp.ll	[new file with mode: 0644]	patch \| blob
test/MC/Disassembler/PowerPC/vsx.txt		patch \| blob \| history
test/MC/PowerPC/vsx.s		patch \| blob \| history