From 94ec1e3c4f12bbb35094e8ef3e37a682cb2c927d Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Tue, 4 Oct 2016 11:25:52 +0000
Subject: [PATCH] [Power9] Exploit D-Form VSX Scalar memory ops that target
 full VSX register set

This patch corresponds to review:

The newly added VSX D-Form (register + offset) memory ops target the upper half
of the VSX register set. The existing ones target the lower half. In order to
unify these and have the ability to target all the VSX registers using D-Form
operations, this patch defines Pseudo-ops for the loads/stores which are
expanded post-RA. The expansion then choses the correct opcode based on the
register that was allocated for the operation.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283212 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCInstrInfo.cpp           | 51 +++++++++++--
 lib/Target/PowerPC/PPCInstrVSX.td             | 24 ++++++
 lib/Target/PowerPC/PPCRegisterInfo.cpp        | 12 +++
 test/CodeGen/PowerPC/VSX-DForm-Scalars.ll     | 73 +++++++++++++++++++
 test/CodeGen/PowerPC/bitcasts-direct-move.ll  |  1 +
 test/CodeGen/PowerPC/float-to-int.ll          | 35 ++++++++-
 test/CodeGen/PowerPC/i64-to-float.ll          | 32 +++++++-
 test/CodeGen/PowerPC/mcm-12.ll                | 15 +++-
 test/CodeGen/PowerPC/mcm-4.ll                 | 30 +++++++-
 .../PowerPC/ppc64-align-long-double.ll        |  1 +
 test/CodeGen/PowerPC/ppc64le-aggregates.ll    |  8 +-
 test/CodeGen/PowerPC/pr25157-peephole.ll      |  8 ++
 test/CodeGen/PowerPC/pr25157.ll               |  4 +
 test/CodeGen/PowerPC/swaps-le-6.ll            |  8 +-
 test/CodeGen/PowerPC/vsx-spill.ll             | 42 ++++++++++-
 test/CodeGen/PowerPC/vsx_insert_extract_le.ll | 21 +++---
 test/CodeGen/PowerPC/vsx_scalar_ld_st.ll      | 14 +++-
 17 files changed, 340 insertions(+), 39 deletions(-)
 create mode 100644 test/CodeGen/PowerPC/VSX-DForm-Scalars.ll

diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 750daec4a56..2e0b9355f82 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -999,13 +999,15 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf64 : PPC::STXSDX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX))
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf32 : PPC::STXSSPX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
@@ -1128,12 +1130,14 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
-                                       FrameIdx));
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf64 : PPC::LXSDX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc),
+                                               DestReg), FrameIdx));
     NonRI = true;
   } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg),
-                                       FrameIdx));
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf32 : PPC::LXSSPX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc),
+                                               DestReg), FrameIdx));
     NonRI = true;
   } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
     assert(Subtarget.isDarwin() &&
@@ -1882,6 +1886,41 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
         .addReg(Reg);
     return true;
   }
+  case PPC::DFLOADf32:
+  case PPC::DFLOADf64:
+  case PPC::DFSTOREf32:
+  case PPC::DFSTOREf64: {
+    assert(Subtarget.hasP9Vector() &&
+           "Invalid D-Form Pseudo-ops on non-P9 target.");
+    unsigned UpperOpcode, LowerOpcode;
+    switch (MI.getOpcode()) {
+    case PPC::DFLOADf32:
+      UpperOpcode = PPC::LXSSP;
+      LowerOpcode = PPC::LFS;
+      break;
+    case PPC::DFLOADf64:
+      UpperOpcode = PPC::LXSD;
+      LowerOpcode = PPC::LFD;
+      break;
+    case PPC::DFSTOREf32:
+      UpperOpcode = PPC::STXSSP;
+      LowerOpcode = PPC::STFS;
+      break;
+    case PPC::DFSTOREf64:
+      UpperOpcode = PPC::STXSD;
+      LowerOpcode = PPC::STFD;
+      break;
+    }
+    unsigned TargetReg = MI.getOperand(0).getReg();
+    unsigned Opcode;
+    if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
+        (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
+      Opcode = LowerOpcode;
+    else
+      Opcode = UpperOpcode;
+    MI.setDesc(get(Opcode));
+    return true;
+  }
   }
   return false;
 }
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 9fd43f1f239..5206d5747c4 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -118,6 +118,7 @@ let Uses = [RM] in {
 
   // Load indexed instructions
   let mayLoad = 1 in {
+    let CodeSize = 3 in
     def LXSDX : XX1Form<31, 588,
                         (outs vsfrc:$XT), (ins memrr:$src),
                         "lxsdx $XT, $src", IIC_LdStLFD,
@@ -142,6 +143,7 @@ let Uses = [RM] in {
 
   // Store indexed instructions
   let mayStore = 1 in {
+    let CodeSize = 3 in
     def STXSDX : XX1Form<31, 716,
                         (outs), (ins vsfrc:$XT, memrr:$dst),
                         "stxsdx $XT, $dst", IIC_LdStSTFD,
@@ -1150,6 +1152,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 
   // VSX scalar loads introduced in ISA 2.07
   let mayLoad = 1 in {
+    let CodeSize = 3 in
     def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
                          "lxsspx $XT, $src", IIC_LdStLFD,
                          [(set f32:$XT, (load xoaddr:$src))]>;
@@ -1163,6 +1166,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 
   // VSX scalar stores introduced in ISA 2.07
   let mayStore = 1 in {
+    let CodeSize = 3 in
     def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
                           "stxsspx $XT, $dst", IIC_LdStSTFD,
                           [(store f32:$XT, xoaddr:$dst)]>;
@@ -2245,6 +2249,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   //===--------------------------------------------------------------------===//
   // Vector/Scalar Load/Store Instructions
 
+  // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+  // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
   let mayLoad = 1 in {
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
@@ -2285,6 +2291,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def LXVWSX  : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
   } // mayLoad
 
+  // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+  // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
   let mayStore = 1 in {
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
@@ -2556,6 +2564,22 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
             (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
   def : Pat<(f64 (PPCVexts f64:$A, 2)),
             (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+  let isPseudo = 1 in {
+    def DFLOADf32  : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
+                            "#DFLOADf32",
+                            [(set f32:$XT, (load iaddr:$src))]>;
+    def DFLOADf64  : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
+                            "#DFLOADf64",
+                            [(set f64:$XT, (load iaddr:$src))]>;
+    def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+                            "#DFSTOREf32",
+                            [(store f32:$XT, iaddr:$dst)]>;
+    def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+                            "#DFSTOREf64",
+                            [(store f64:$XT, iaddr:$dst)]>;
+  }
+  def : Pat<(f64 (extloadf32 iaddr:$src)),
+            (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
 } // end HasP9Vector, AddedComplexity
 
 let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index facdb36e815..e4920140286 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -78,6 +78,18 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
   ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
   ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
   ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+
+  // VSX
+  ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX;
+  ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX;
+  ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX;
+  ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX;
+  ImmToIdxMap[PPC::LXV] = PPC::LXVX;
+  ImmToIdxMap[PPC::LXSD] = PPC::LXSDX;
+  ImmToIdxMap[PPC::LXSSP] = PPC::LXSSPX;
+  ImmToIdxMap[PPC::STXV] = PPC::STXVX;
+  ImmToIdxMap[PPC::STXSD] = PPC::STXSDX;
+  ImmToIdxMap[PPC::STXSSP] = PPC::STXSSPX;
 }
 
 /// getPointerRegClass - Return the register class to use to hold pointers.
diff --git a/test/CodeGen/PowerPC/VSX-DForm-Scalars.ll b/test/CodeGen/PowerPC/VSX-DForm-Scalars.ll
new file mode 100644
index 00000000000..94412d6d8ab
--- /dev/null
+++ b/test/CodeGen/PowerPC/VSX-DForm-Scalars.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -verify-machineinstrs | FileCheck %s
+
+@gd = external local_unnamed_addr global [500 x double], align 8
+@gf = external local_unnamed_addr global [500 x float], align 4
+
+; Function Attrs: nounwind
+define double @_Z7getLXSDddddddddddddd(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m) local_unnamed_addr #0 {
+entry:
+  %0 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 10), align 8
+  %1 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 17), align 8
+  %2 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 87), align 8
+  %3 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 97), align 8
+  %4 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 77), align 8
+  store double %3, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 122), align 8
+  %add = fadd double %a, %b
+  %add1 = fadd double %add, %c
+  %add2 = fadd double %add1, %d
+  %add3 = fadd double %add2, %e
+  %add4 = fadd double %add3, %f
+  %add5 = fadd double %add4, %g
+  %add6 = fadd double %add5, %h
+  %add7 = fadd double %add6, %i
+  %add8 = fadd double %add7, %j
+  %add9 = fadd double %add8, %k
+  %add10 = fadd double %add9, %l
+  %add11 = fadd double %add10, %m
+  %add12 = fadd double %add11, %0
+  %add13 = fadd double %add12, %1
+  %add14 = fadd double %add13, %2
+  %add15 = fadd double %add14, %3
+  %add16 = fadd double %add15, %4
+  %call = tail call double @_Z7getLXSDddddddddddddd(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m)
+  %add17 = fadd double %add16, %call
+  ret double %add17
+; CHECK-LABEL: _Z7getLXSDddddddddddddd
+; CHECK: lxsd [[LD:[0-9]+]], 776(3)
+; CHECK: stxsd [[LD]], 976(3)
+}
+
+; Function Attrs: nounwind
+define float @_Z8getLXSSPfffffffffffff(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m) local_unnamed_addr #0 {
+entry:
+  %0 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 10), align 4
+  %1 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 17), align 4
+  %2 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 87), align 4
+  %3 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 97), align 4
+  %4 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 77), align 4
+  store float %3, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 122), align 4
+  %add = fadd float %a, %b
+  %add1 = fadd float %add, %c
+  %add2 = fadd float %add1, %d
+  %add3 = fadd float %add2, %e
+  %add4 = fadd float %add3, %f
+  %add5 = fadd float %add4, %g
+  %add6 = fadd float %add5, %h
+  %add7 = fadd float %add6, %i
+  %add8 = fadd float %add7, %j
+  %add9 = fadd float %add8, %k
+  %add10 = fadd float %add9, %l
+  %add11 = fadd float %add10, %m
+  %add12 = fadd float %add11, %0
+  %add13 = fadd float %add12, %1
+  %add14 = fadd float %add13, %2
+  %add15 = fadd float %add14, %3
+  %add16 = fadd float %add15, %4
+  %call = tail call float @_Z8getLXSSPfffffffffffff(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m)
+  %add17 = fadd float %add16, %call
+  ret float %add17
+; CHECK-LABEL: _Z8getLXSSPfffffffffffff
+; CHECK: lxssp [[LD:[0-9]+]], 388(3)
+; CHECK: stxssp [[LD]], 488(3)
+}
diff --git a/test/CodeGen/PowerPC/bitcasts-direct-move.ll b/test/CodeGen/PowerPC/bitcasts-direct-move.ll
index af6ffc755c4..79da5cb6874 100644
--- a/test/CodeGen/PowerPC/bitcasts-direct-move.ll
+++ b/test/CodeGen/PowerPC/bitcasts-direct-move.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
 ; RUN:  --check-prefix=CHECK-P7
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
 
 define signext i32 @f32toi32(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll
index 012a021091b..80e6b75ced3 100644
--- a/test/CodeGen/PowerPC/float-to-int.ll
+++ b/test/CodeGen/PowerPC/float-to-int.ll
@@ -1,6 +1,11 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=g5
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -mattr=-direct-move | FileCheck -check-prefix=CHECK-P9 %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -19,6 +24,12 @@ define i64 @foo(float %a) nounwind {
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo
+; CHECK-P9: xscvdpsxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i64 @foo2(double %a) nounwind {
@@ -36,6 +47,12 @@ define i64 @foo2(double %a) nounwind {
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo2
+; CHECK-P9: xscvdpsxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i64 @foo3(float %a) nounwind {
@@ -53,6 +70,12 @@ define i64 @foo3(float %a) nounwind {
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo3
+; CHECK-P9: xscvdpuxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i64 @foo4(double %a) nounwind {
@@ -70,6 +93,12 @@ define i64 @foo4(double %a) nounwind {
 ; CHECK-VSX: stxsdx [[REG]],
 ; CHECK-VSX: ld 3,
 ; CHECK-VSX: blr
+
+; CHECK-LABEL-P9: @foo4
+; CHECK-P9: xscvdpuxds [[REG:[0-9]+]], 1
+; CHECK-P9: stfd [[REG]],
+; CHECK-P9: ld 3,
+; CHECK-P9: blr
 }
 
 define i32 @goo(float %a) nounwind {
diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll
index d8d28223ada..e139579bf48 100644
--- a/test/CodeGen/PowerPC/i64-to-float.ll
+++ b/test/CodeGen/PowerPC/i64-to-float.ll
@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -mattr=-direct-move | FileCheck %s -check-prefix=CHECK-P9
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -19,6 +23,12 @@ entry:
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: fcfids 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @foo
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvsxdsp 1, [[REG]]
+; CHECK-P9: blr
 }
 
 define double @goo(i64 %a) nounwind {
@@ -37,6 +47,12 @@ entry:
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: xscvsxddp 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @goo
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvsxddp 1, [[REG]]
+; CHECK-P9: blr
 }
 
 define float @foou(i64 %a) nounwind {
@@ -55,6 +71,12 @@ entry:
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: fcfidus 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @foou
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvuxdsp 1, [[REG]]
+; CHECK-P9: blr
 }
 
 define double @goou(i64 %a) nounwind {
@@ -73,5 +95,11 @@ entry:
 ; CHECK-VSX: lxsdx [[REG:[0-9]+]],
 ; CHECK-VSX: xscvuxddp 1, [[REG]]
 ; CHECK-VSX: blr
+
+; CHECK-P9: @goou
+; CHECK-P9: std 3,
+; CHECK-P9: lfd [[REG:[0-9]+]],
+; CHECK-P9: xscvuxddp 1, [[REG]]
+; CHECK-P9: blr
 }
 
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
index 26255eedf8e..c07dbd9d2d3 100644
--- a/test/CodeGen/PowerPC/mcm-12.ll
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium -mattr=-vsx < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium \
+; RUN:   -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium \
+; RUN:   -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O1 -code-model=medium < %s | \
+; RUN:   FileCheck -check-prefix=CHECK-P9 %s
 
 ; Test peephole optimization for medium code model (32-bit TOC offsets)
 ; for loading a value from the constant pool (TOC-relative).
@@ -24,3 +28,10 @@ entry:
 ; CHECK-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
 ; CHECK-VSX: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l
 ; CHECK-VSX: lxsdx {{[0-9]+}}, 0, [[REG1]]
+
+; CHECK-P9: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK-P9: .quad 4562098671269285104
+; CHECK-P9-LABEL: test_double_const:
+; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK-P9: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l
+; CHECK-P9: lfd {{[0-9]+}}, 0([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
index 34fa4d1093f..180977d8932 100644
--- a/test/CodeGen/PowerPC/mcm-4.ll
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -1,7 +1,15 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium \
+; RUN:   -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large \
+; RUN:   -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -code-model=medium \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-P9 %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -code-model=large \
+; RUN:   -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-P9 %s
 
 ; Test correct code generation for medium and large code model
 ; for loading a value from the constant pool (TOC-relative).
@@ -41,3 +49,17 @@ entry:
 ; LARGE-VSX: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
 ; LARGE-VSX: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
 ; LARGE-VSX: lxsdx {{[0-9]+}}, 0, [[REG2]]
+
+; MEDIUM-P9: [[VAR:[a-z0-9A-Z_.]+]]:
+; MEDIUM-P9: .quad 4562098671269285104
+; MEDIUM-P9-LABEL: test_double_const:
+; MEDIUM-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; MEDIUM-P9: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM-P9: lfd {{[0-9]+}}, 0([[REG2]])
+
+; LARGE-P9: [[VAR:[a-z0-9A-Z_.]+]]:
+; LARGE-P9: .quad 4562098671269285104
+; LARGE-P9-LABEL: test_double_const:
+; LARGE-P9: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE-P9: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
+; LARGE-P9: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index c6c7e2f5b6a..c3cccd5b293 100644
--- a/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck %s
 
 ; Verify internal alignment of long double in a struct.  The double
 ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain
diff --git a/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
index 65e94a1face..25b3e5d8933 100644
--- a/test/CodeGen/PowerPC/ppc64le-aggregates.ll
+++ b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec -mattr=-vsx | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mattr=+altivec -mattr=-vsx | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr8 \
+; RUN:   -mattr=+altivec -mattr=-vsx | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mattr=+altivec \
+; RUN:   -mattr=-vsx | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr9 \
+; RUN:   -mattr=-direct-move -mattr=+altivec | FileCheck %s
 
 ; Currently VSX support is disabled for this test because we generate lxsdx
 ; instead of lfd, and stxsdx instead of stfd.  That is a poor choice when we
diff --git a/test/CodeGen/PowerPC/pr25157-peephole.ll b/test/CodeGen/PowerPC/pr25157-peephole.ll
index c5bd49b492c..7f959add00f 100644
--- a/test/CodeGen/PowerPC/pr25157-peephole.ll
+++ b/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -1,4 +1,6 @@
 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
+; RUN:   %s --check-prefix=CHECK-P9
 
 ; Verify peephole simplification of splats and swaps.  Bugpoint-reduced
 ; test from Eric Schweitz.
@@ -59,3 +61,9 @@ L.LB38_2452:
 ; CHECK: xxspltd
 ; CHECK: stxvd2x
 ; CHECK-NOT: xxswapd
+
+; CHECK-P9-LABEL: @aercalc_
+; CHECK-P9: lfs
+; CHECK-P9: xxspltd
+; CHECK-P9: stxvx
+; CHECK-P9-NOT: xxswapd
diff --git a/test/CodeGen/PowerPC/pr25157.ll b/test/CodeGen/PowerPC/pr25157.ll
index 7137d675a74..ee9a0034f2c 100644
--- a/test/CodeGen/PowerPC/pr25157.ll
+++ b/test/CodeGen/PowerPC/pr25157.ll
@@ -1,4 +1,6 @@
 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
+; RUN:   --check-prefix=CHECK-P9 %s
 
 ; Verify correct generation of an lxsspx rather than an invalid optimization
 ; to lxvdsx.  Bugpoint-reduced test from Eric Schweitz.
@@ -56,3 +58,5 @@ L.LB38_2452:
 
 ; CHECK-LABEL: @aercalc_
 ; CHECK: lxsspx
+; CHECK-P9-LABEL: @aercalc_
+; CHECK-P9: lfs
diff --git a/test/CodeGen/PowerPC/swaps-le-6.ll b/test/CodeGen/PowerPC/swaps-le-6.ll
index da6605e494f..d573441f2cc 100644
--- a/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -34,9 +34,9 @@ entry:
 
 ; CHECK-P9-LABEL: @bar0
 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
-; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
 ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
+; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1
 ; CHECK-P9: stxvx [[REG5]]
 
 define void @bar1() {
@@ -57,8 +57,8 @@ entry:
 
 ; CHECK-P9-LABEL: @bar1
 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
-; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
 ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
-; CHECK-P9: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
+; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]]
 ; CHECK-P9: stxvx [[REG5]]
 
diff --git a/test/CodeGen/PowerPC/vsx-spill.ll b/test/CodeGen/PowerPC/vsx-spill.ll
index 388029ae4ce..4dec0daecd9 100644
--- a/test/CodeGen/PowerPC/vsx-spill.ll
+++ b/test/CodeGen/PowerPC/vsx-spill.ll
@@ -1,7 +1,14 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-REG %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | \
+; RUN:   FileCheck -check-prefix=CHECK-FISL %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-P9-REG %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -fast-isel -O0 < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-P9-FISL %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -21,6 +28,15 @@ entry:
 ; CHECK-FISL: stxsdx 1, 1, 0
 ; CHECK-FISL: blr
 
+; CHECK-P9-REG: @foo1
+; CHECK-P9-REG: xxlor [[R1:[0-9]+]], 1, 1
+; CHECK-P9-REG: xxlor 1, [[R1]], [[R1]]
+; CHECK-P9-REG: blr
+
+; CHECK-P9-FISL: @foo1
+; CHECK-P9-FISL: stfd 31, -8(1)
+; CHECK-P9-FISL: blr
+
 return:                                           ; preds = %entry
   ret double %a
 }
@@ -42,6 +58,17 @@ entry:
 ; CHECK-FISL: lxsdx [[R1]], [[R1]], 0
 ; CHECK-FISL: blr
 
+; CHECK-P9-REG: @foo2
+; CHECK-P9-REG: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1
+; CHECK-P9-REG: {{xxlor|xsadddp}} 1, [[R1]], [[R1]]
+; CHECK-P9-REG: blr
+
+; CHECK-P9-FISL: @foo2
+; CHECK-P9-FISL: xsadddp [[R1:[0-9]+]], 1, 1
+; CHECK-P9-FISL: stfd [[R1]], [[OFF:[0-9\-]+]](1)
+; CHECK-P9-FISL: lfd [[R1]], [[OFF]](1)
+; CHECK-P9-FISL: blr
+
 return:                                           ; preds = %entry
   ret double %b
 }
@@ -57,6 +84,15 @@ entry:
 ; CHECK: xsadddp 1, [[R1]], [[R1]]
 ; CHECK: blr
 
+; CHECK-P9-REG-LABEL: foo3
+; CHECK-P9-REG: stfd 1, [[OFF:[0-9\-]+]](1)
+; CHECK-P9-REG: lfd [[FPR:[0-9]+]], [[OFF]](1)
+; CHECK-P9-REG: xsadddp 1, [[FPR]], [[FPR]]
+
+; CHECK-P9-FISL-LABEL: foo3
+; CHECK-P9-FISL: stfd 1, [[OFF:[0-9\-]+]](1)
+; CHECK-P9-FISL: lfd [[FPR:[0-9]+]], [[OFF]](1)
+; CHECK-P9-FISL: xsadddp 1, [[FPR]], [[FPR]]
 return:                                           ; preds = %entry
   %b = fadd double %a, %a
   ret double %b
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 53878883033..09bf6830416 100644
--- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -22,10 +22,10 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
 ; CHECK: xxpermdi 34, 0, 1, 1
 
 ; CHECK-P9-LABEL: testi0
-; CHECK-P9: lxsdx 0, 0, 4
-; CHECK-P9: lxvx 1, 0, 3
-; CHECK-P9: xxspltd 0, 0, 0
-; CHECK-P9: xxpermdi 34, 1, 0, 1
+; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
+; CHECK-P9: lxvx [[REG2:[0-9]+]], 0, 3
+; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
+; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1
 }
 
 define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
@@ -42,10 +42,10 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
 ; CHECK: xxmrgld 34, 1, 0
 
 ; CHECK-P9-LABEL: testi1
-; CHECK-P9: lxsdx 0, 0, 4
-; CHECK-P9: lxvx 1, 0, 3
-; CHECK-P9: xxspltd 0, 0, 0
-; CHECK-P9: xxmrgld 34, 0, 1
+; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
+; CHECK-P9: lxvx [[REG2:[0-9]+]], 0, 3
+; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
+; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]]
 }
 
 define double @teste0(<2 x double>* %p1) {
@@ -57,7 +57,7 @@ define double @teste0(<2 x double>* %p1) {
 ; CHECK: lxvd2x 1, 0, 3
 
 ; CHECK-P9-LABEL: teste0
-; CHECK-P9: lxsdx 1, 0, 3
+; CHECK-P9: lfd 1, 0(3)
 }
 
 define double @teste1(<2 x double>* %p1) {
@@ -70,6 +70,5 @@ define double @teste1(<2 x double>* %p1) {
 ; CHECK: xxswapd 1, 0
 
 ; CHECK-P9-LABEL: teste1
-; CHECK-P9: li 4, 8
-; CHECK-P9: lxsdx 1, 3, 4
+; CHECK-P9: lfd 1, 8(3)
 }
diff --git a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
index 65eb6089120..7da2ea27c18 100644
--- a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
+++ b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -1,5 +1,9 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -mattr=-direct-move | FileCheck %s -check-prefix=CHECK-P9
 
 @d = common global double 0.000000e+00, align 8
 @f = common global float 0.000000e+00, align 4
@@ -121,6 +125,9 @@ entry:
 ; CHECK-LABEL: @dblToFloat
 ; CHECK: lxsdx [[REGLD5:[0-9]+]],
 ; CHECK: stxsspx [[REGLD5]],
+; CHECK-P9-LABEL: @dblToFloat
+; CHECK-P9: lfd [[REGLD5:[0-9]+]],
+; CHECK-P9: stfs [[REGLD5]],
 }
 
 ; Function Attrs: nounwind
@@ -134,4 +141,7 @@ entry:
 ; CHECK-LABEL: @floatToDbl
 ; CHECK: lxsspx [[REGLD5:[0-9]+]],
 ; CHECK: stxsdx [[REGLD5]],
+; CHECK-P9-LABEL: @floatToDbl
+; CHECK-P9: lfs [[REGLD5:[0-9]+]],
+; CHECK-P9: stfd [[REGLD5]],
 }
-- 
2.50.1