bool isVSFRCRegister(unsigned Register) const {
return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
}
+ bool isVSSRCRegister(unsigned Register) const {
+ return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
+ }
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg);
bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// If this is a potential VSX load with an offset of 0, a VSX indexed load can
// be used.
+ bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
- if (IsVSFRC && (Opc == PPC::LFD) &&
+ bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
+ bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
+ if ((Is32VSXLoad || Is64VSXLoad) &&
(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
(Addr.Offset == 0)) {
UseOffset = false;
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
// VSX only provides an indexed load.
- if (IsVSFRC && Opc == PPC::LFD) return false;
+ if (Is32VSXLoad || Is64VSXLoad) return false;
MachineMemOperand *MMO =
FuncInfo.MF->getMachineMemOperand(
// Base reg with offset in range.
} else if (UseOffset) {
// VSX only provides an indexed load.
- if (IsVSFRC && Opc == PPC::LFD) return false;
+ if (Is32VSXLoad || Is64VSXLoad) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addReg(Addr.Base.Reg);
case PPC::LWA: Opc = PPC::LWAX; break;
case PPC::LWA_32: Opc = PPC::LWAX_32; break;
case PPC::LD: Opc = PPC::LDX; break;
- case PPC::LFS: Opc = PPC::LFSX; break;
+ case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
// If this is a potential VSX store with an offset of 0, a VSX indexed store
// can be used.
+ bool IsVSSRC = isVSSRCRegister(SrcReg);
bool IsVSFRC = isVSFRCRegister(SrcReg);
- if (IsVSFRC && (Opc == PPC::STFD) &&
- (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
+ bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
+ bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
+ if ((Is32VSXStore || Is64VSXStore) &&
+ (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
(Addr.Offset == 0)) {
UseOffset = false;
}
// into a RegBase.
if (Addr.BaseType == Address::FrameIndexBase) {
// VSX only provides an indexed store.
- if (IsVSFRC && Opc == PPC::STFD) return false;
+ if (Is32VSXStore || Is64VSXStore) return false;
MachineMemOperand *MMO =
FuncInfo.MF->getMachineMemOperand(
// Base reg with offset in range.
} else if (UseOffset) {
// VSX only provides an indexed store.
- if (IsVSFRC && Opc == PPC::STFD) return false;
+ if (Is32VSXStore || Is64VSXStore) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
case PPC::STH8: Opc = PPC::STHX8; break;
case PPC::STW8: Opc = PPC::STWX8; break;
case PPC::STD: Opc = PPC::STDX; break;
- case PPC::STFS: Opc = PPC::STFSX; break;
+ case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
}
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xssubsp $XT, $XA, $XB", IIC_VecFP,
[(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+
+ // FMA Instructions
+ let BaseName = "XSMADDASP" in {
+ let isCommutable = 1 in
+ def XSMADDASP : XX3Form<60, 1,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMADDMSP : XX3Form<60, 9,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSMSUBASP" in {
+ let isCommutable = 1 in
+ def XSMSUBASP : XX3Form<60, 17,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMSUBMSP : XX3Form<60, 25,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMADDASP" in {
+ let isCommutable = 1 in
+ def XSNMADDASP : XX3Form<60, 129,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMADDMSP : XX3Form<60, 137,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMSUBASP" in {
+ let isCommutable = 1 in
+ def XSNMSUBASP : XX3Form<60, 145,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMSUBMSP : XX3Form<60, 153,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
} // AddedComplexity = 400
} // HasP8Vector
; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
declare double @dummy1(double) #0
declare double @dummy2(double, double) #0
declare double @dummy3(double, double, double) #0
+declare float @dummy4(float, float) #0
define double @test_FMADD1(double %A, double %B, double %C) {
%D = fmul double %A, %B ; <double> [#uses=1]
; CHECK-VSX: fnmsubs
; CHECK-VSX-NEXT: blr
}
+
+define float @test_XSMADDMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fadd float %C, %D ; <float> [#uses=1]
+ ret float %E
+; CHECK-P8-LABEL: test_XSMADDMSP:
+; CHECK-P8: xsmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMSUBMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fsub float %D, %C ; <float> [#uses=1]
+ ret float %E
+; CHECK-P8-LABEL: test_XSMSUBMSP:
+; CHECK-P8: xsmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMADDASP(float %A, float %B, float %C, float %D) {
+ %E = fmul float %A, %B ; <float> [#uses=2]
+ %F = fadd float %E, %C ; <float> [#uses=1]
+ %G = fsub float %E, %D ; <float> [#uses=1]
+ %H = call float @dummy4(float %F, float %G) ; <float> [#uses=1]
+ ret float %H
+; CHECK-P8-LABEL: test_XSMADDASP:
+; CHECK-P8: xsmaddasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSMSUBASP(float %A, float %B, float %C, float %D) {
+ %E = fmul float %A, %B ; <float> [#uses=2]
+ %F = fsub float %E, %C ; <float> [#uses=1]
+ %G = fsub float %E, %D ; <float> [#uses=1]
+ %H = call float @dummy4(float %F, float %G) ; <float> [#uses=1]
+ ret float %H
+; CHECK-P8-LABEL: test_XSMSUBASP:
+; CHECK-P8: xsmsubasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSNMADDMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fadd float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8-LABEL: test_XSNMADDMSP:
+; CHECK-P8: xsnmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMSUBMSP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fsub float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBMSP:
+; CHECK-P8: xsnmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMADDASP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fadd float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ %H = call float @dummy4(float %E, float %F) ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8-LABEL: test_XSNMADDASP:
+; CHECK-P8: xsnmaddasp
+}
+
+define float @test_XSNMSUBASP(float %A, float %B, float %C) {
+ %D = fmul float %A, %B ; <float> [#uses=1]
+ %E = fsub float %D, %C ; <float> [#uses=1]
+ %F = fsub float -0.000000e+00, %E ; <float> [#uses=1]
+ %H = call float @dummy4(float %E, float %F) ; <float> [#uses=1]
+ ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBASP:
+; CHECK-P8: xsnmsubasp
+}
--- /dev/null
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
+define void @test1sp(float %a, float %b, float %c, float %e, float* nocapture %d) #0 {
+entry:
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx1, align 4
+ ret void
+
+; CHECK-LABEL: @test1sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 4
+; CHECK-DAG: stxsspx 3, 0, 7
+; CHECK-DAG: stxsspx 1, 7, [[C1]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test1sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 7
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 1, 7, [[C1]]
+; CHECK-FISL: blr
+}
+
+define void @test2sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx1, align 4
+ %2 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+ %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+ store float %2, float* %arrayidx2, align 4
+ ret void
+
+; CHECK-LABEL: @test2sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+; CHECK-DAG: stxsspx 3, 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test2sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+define void @test3sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 3
+ store float %2, float* %arrayidx1, align 4
+ %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+ %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+ store float %3, float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx3, align 4
+ ret void
+
+; CHECK-LABEL: @test3sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 12
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: li [[C3:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+
+; CHECK-DAG: xsmaddmsp 3, 2, 4
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 3, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test3sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: fmr 4, [[F1]]
+; CHECK-FISL-DAG: xsmaddasp 4, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL: blr
+}
+
+define void @test4sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+ %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+ store float %0, float* %d, align 4
+ %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+ %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+ store float %1, float* %arrayidx1, align 4
+ %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+ %arrayidx3 = getelementptr inbounds float, float* %d, i64 3
+ store float %2, float* %arrayidx3, align 4
+ %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+ %arrayidx4 = getelementptr inbounds float, float* %d, i64 2
+ store float %3, float* %arrayidx4, align 4
+ ret void
+
+; CHECK-LABEL: @test4sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 12
+; CHECK-DAG: xsmaddasp 4, 2, 3
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test4sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr [[F1]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+declare float @llvm.fma.f32(float, float, float) #0
# CHECK: xsmaddmdp 7, 63, 27
0xf0 0xff 0xd9 0x4c
+# CHECK: xsmaddasp 7, 63, 27
+0xf0 0xff 0xd8 0x0c
+
+# CHECK: xsmaddmsp 7, 63, 27
+0xf0 0xff 0xd8 0x4c
+
# CHECK: xsmaxdp 7, 63, 27
0xf0 0xff 0xdd 0x04
# CHECK: xsmsubmdp 7, 63, 27
0xf0 0xff 0xd9 0xcc
+# CHECK: xsmsubasp 7, 63, 27
+0xf0 0xff 0xd8 0x8c
+
+# CHECK: xsmsubmsp 7, 63, 27
+0xf0 0xff 0xd8 0xcc
+
# CHECK: xsmulsp 7, 63, 27
0xf0 0xff 0xd8 0x84
# CHECK: xsnmsubmdp 7, 63, 27
0xf0 0xff 0xdd 0xcc
+# CHECK: xsnmaddasp 7, 63, 27
+0xf0 0xff 0xdc 0x0c
+
+# CHECK: xsnmaddmsp 7, 63, 27
+0xf0 0xff 0xdc 0x4c
+
+# CHECK: xsnmsubasp 7, 63, 27
+0xf0 0xff 0xdc 0x8c
+
+# CHECK: xsnmsubmsp 7, 63, 27
+0xf0 0xff 0xdc 0xcc
+
# CHECK: xsrdpi 7, 27
0xf0 0xe0 0xd9 0x24
# CHECK-BE: xsmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x4c]
# CHECK-LE: xsmaddmdp 7, 63, 27 # encoding: [0x4c,0xd9,0xff,0xf0]
xsmaddmdp 7, 63, 27
+# CHECK-BE: xsmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x0c]
+# CHECK-LE: xsmaddasp 7, 63, 27 # encoding: [0x0c,0xd8,0xff,0xf0]
+ xsmaddasp 7, 63, 27
+# CHECK-BE: xsmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x4c]
+# CHECK-LE: xsmaddmsp 7, 63, 27 # encoding: [0x4c,0xd8,0xff,0xf0]
+ xsmaddmsp 7, 63, 27
# CHECK-BE: xsmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x04]
# CHECK-LE: xsmaxdp 7, 63, 27 # encoding: [0x04,0xdd,0xff,0xf0]
xsmaxdp 7, 63, 27
# CHECK-BE: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc]
# CHECK-LE: xsmsubmdp 7, 63, 27 # encoding: [0xcc,0xd9,0xff,0xf0]
xsmsubmdp 7, 63, 27
+# CHECK-BE: xsmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x8c]
+# CHECK-LE: xsmsubasp 7, 63, 27 # encoding: [0x8c,0xd8,0xff,0xf0]
+ xsmsubasp 7, 63, 27
+# CHECK-BE: xsmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0xcc]
+# CHECK-LE: xsmsubmsp 7, 63, 27 # encoding: [0xcc,0xd8,0xff,0xf0]
+ xsmsubmsp 7, 63, 27
# CHECK-BE: xsmulsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x84]
# CHECK-LE: xsmulsp 7, 63, 27 # encoding: [0x84,0xd8,0xff,0xf0]
xsmulsp 7, 63, 27
# CHECK-BE: xsnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0xcc]
# CHECK-LE: xsnmsubmdp 7, 63, 27 # encoding: [0xcc,0xdd,0xff,0xf0]
xsnmsubmdp 7, 63, 27
+# CHECK-BE: xsnmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x0c]
+# CHECK-LE: xsnmaddasp 7, 63, 27 # encoding: [0x0c,0xdc,0xff,0xf0]
+ xsnmaddasp 7, 63, 27
+# CHECK-BE: xsnmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x4c]
+# CHECK-LE: xsnmaddmsp 7, 63, 27 # encoding: [0x4c,0xdc,0xff,0xf0]
+ xsnmaddmsp 7, 63, 27
+# CHECK-BE: xsnmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x8c]
+# CHECK-LE: xsnmsubasp 7, 63, 27 # encoding: [0x8c,0xdc,0xff,0xf0]
+ xsnmsubasp 7, 63, 27
+# CHECK-BE: xsnmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0xcc]
+# CHECK-LE: xsnmsubmsp 7, 63, 27 # encoding: [0xcc,0xdc,0xff,0xf0]
+ xsnmsubmsp 7, 63, 27
# CHECK-BE: xsrdpi 7, 27 # encoding: [0xf0,0xe0,0xd9,0x24]
# CHECK-LE: xsrdpi 7, 27 # encoding: [0x24,0xd9,0xe0,0xf0]
xsrdpi 7, 27