def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
(v2f64 (XXPERMDIs
(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
- }
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), ixaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ ixaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+ } // IsLittleEndian, HasP9Vector
let Predicates = [IsBigEndian, HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
(v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
(v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
- }
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), ixaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), ixaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+ } // IsBigEndian, HasP9Vector
}
let Predicates = [IsBigEndian, HasP9Vector] in {
def : Pat<DWToSPExtractConv.BVS,
(v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
(XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+ // Elements in a register on a BE system are in order <0, 1, 2, 3>.
+ // The store instructions store the second word from the left.
+ // So to align element zero, we need to modulo-left-shift by 3 words.
+ // Similar logic applies for elements 2 and 3.
+ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ }
}
+ let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ }
+
// Big endian, available on all targets with VSX
let Predicates = [IsBigEndian, HasVSX] in {
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
def : Pat<DWToSPExtractConv.BVS,
(v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
(XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+ def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+ // Elements in a register on a LE system are in order <3, 2, 1, 0>.
+ // The store instructions store the second word from the left.
+ // So to align element 3, we need to modulo-left-shift by 3 words.
+ // Similar logic applies for elements 0 and 1.
+ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ }
}
+ let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ }
+
let Predicates = [IsLittleEndian, HasVSX] in {
// Little endian, available on all targets with VSX
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unkknown-unknown \
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs -O2 < %s | FileCheck %s \
; RUN: --check-prefix=CHECK-P9
-; Function Attrs: norecurse nounwind writeonly
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unkknown-unknown \
+; RUN: -ppc-asm-full-reg-names -verify-machineinstrs -O2 < %s | FileCheck %s \
+; RUN: --check-prefix=CHECK-P9-BE
+
+define <2 x i64> @testllv(<2 x i64> returned %a, <2 x i64> %b, i64* nocapture %ap, i64 %Idx) local_unnamed_addr #0 {
+; CHECK-LABEL: testllv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxswapd vs0, vs34
+; CHECK-NEXT: sldi r3, r8, 3
+; CHECK-NEXT: stfdx f0, r7, r3
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testllv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r8, 3
+; CHECK-BE-NEXT: stxsdx vs34, r7, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testllv:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xxswapd vs0, vs34
+; CHECK-P9-NEXT: sldi r3, r8, 3
+; CHECK-P9-NEXT: stfdx f0, r7, r3
+; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testllv:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: sldi r3, r8, 3
+; CHECK-P9-BE-NEXT: stxsdx vs34, r7, r3
+; CHECK-P9-BE-NEXT: blr
+entry:
+ %vecext = extractelement <2 x i64> %a, i32 0
+ %arrayidx = getelementptr inbounds i64, i64* %ap, i64 %Idx
+ store i64 %vecext, i64* %arrayidx, align 8
+ ret <2 x i64> %a
+}
+
define <2 x i64> @testll0(<2 x i64> returned %a, <2 x i64> %b, i64* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testll0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxswapd vs0, vs34
-; CHECK-NEXT: mfvsrd r3, f0
-; CHECK-NEXT: std r3, 24(r7)
+; CHECK-NEXT: stfd f0, 24(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testll0:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mfvsrd r3, vs34
-; CHECK-BE-NEXT: std r3, 24(r7)
+; CHECK-BE-NEXT: addi r3, r7, 24
+; CHECK-BE-NEXT: stxsdx vs34, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testll0:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: mfvsrld r3, vs34
-; CHECK-P9-NEXT: std r3, 24(r7)
+; CHECK-P9-NEXT: xxswapd vs0, vs34
+; CHECK-P9-NEXT: stfd f0, 24(r7)
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testll0:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: stxsd v2, 24(r7)
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <2 x i64> %a, i32 0
%arrayidx = getelementptr inbounds i64, i64* %ap, i64 3
define <2 x i64> @testll1(<2 x i64> returned %a, i64 %b, i64* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testll1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mfvsrd r3, vs34
-; CHECK-NEXT: std r3, 24(r6)
+; CHECK-NEXT: addi r3, r6, 24
+; CHECK-NEXT: stxsdx vs34, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testll1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxswapd vs0, vs34
-; CHECK-BE-NEXT: mfvsrd r3, f0
-; CHECK-BE-NEXT: std r3, 24(r6)
+; CHECK-BE-NEXT: stfd f0, 24(r6)
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testll1:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: mfvsrd r3, vs34
-; CHECK-P9-NEXT: std r3, 24(r6)
+; CHECK-P9-NEXT: stxsd v2, 24(r6)
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testll1:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxswapd vs0, vs34
+; CHECK-P9-BE-NEXT: stfd f0, 24(r6)
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <2 x i64> %a, i32 1
%arrayidx = getelementptr inbounds i64, i64* %ap, i64 3
ret <2 x i64> %a
}
-; Function Attrs: norecurse nounwind writeonly
+define <2 x double> @testdv(<2 x double> returned %a, <2 x double> %b, double* nocapture %ap, i64 %Idx) local_unnamed_addr #0 {
+; CHECK-LABEL: testdv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxswapd vs0, vs34
+; CHECK-NEXT: sldi r3, r8, 3
+; CHECK-NEXT: stfdx f0, r7, r3
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testdv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r8, 3
+; CHECK-BE-NEXT: stxsdx vs34, r7, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testdv:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xxswapd vs0, vs34
+; CHECK-P9-NEXT: sldi r3, r8, 3
+; CHECK-P9-NEXT: stfdx f0, r7, r3
+; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testdv:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: sldi r3, r8, 3
+; CHECK-P9-BE-NEXT: stxsdx vs34, r7, r3
+; CHECK-P9-BE-NEXT: blr
+entry:
+ %vecext = extractelement <2 x double> %a, i32 0
+ %arrayidx = getelementptr inbounds double, double* %ap, i64 %Idx
+ store double %vecext, double* %arrayidx, align 8
+ ret <2 x double> %a
+}
+
define <2 x double> @testd0(<2 x double> returned %a, <2 x double> %b, double* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testd0:
; CHECK: # %bb.0: # %entry
; CHECK-P9-NEXT: xxswapd vs0, vs34
; CHECK-P9-NEXT: stfd f0, 24(r7)
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testd0:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: stxsd v2, 24(r7)
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <2 x double> %a, i32 0
%arrayidx = getelementptr inbounds double, double* %ap, i64 3
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: stxsd v2, 24(r7)
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testd1:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxswapd vs0, vs34
+; CHECK-P9-BE-NEXT: stfd f0, 24(r7)
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <2 x double> %a, i32 1
%arrayidx = getelementptr inbounds double, double* %ap, i64 3
define <4 x float> @testf0(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testf0:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT: xscvspdpn f0, vs0
-; CHECK-NEXT: stfs f0, 12(r7)
+; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testf0:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xscvspdpn f0, vs34
-; CHECK-BE-NEXT: stfs f0, 12(r7)
+; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stfiwx f0, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testf0:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: stfs f0, 12(r7)
+; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stfiwx f0, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testf0:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%arrayidx = getelementptr inbounds float, float* %ap, i64 3
define <4 x float> @testf1(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testf1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxswapd vs0, vs34
-; CHECK-NEXT: xscvspdpn f0, vs0
-; CHECK-NEXT: stfs f0, 12(r7)
+; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testf1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: stfs f0, 12(r7)
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stxsiwx vs34, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testf1:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xxswapd vs0, vs34
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: stfs f0, 12(r7)
+; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stfiwx f0, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testf1:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 1
%arrayidx = getelementptr inbounds float, float* %ap, i64 3
define <4 x float> @testf2(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1
-; CHECK-NEXT: xscvspdpn f0, vs0
-; CHECK-NEXT: stfs f0, 12(r7)
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stxsiwx vs34, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testf2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxswapd vs0, vs34
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: stfs f0, 12(r7)
+; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stfiwx f0, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testf2:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: stfs f0, 12(r7)
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stxsiwx vs34, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testf2:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 2
%arrayidx = getelementptr inbounds float, float* %ap, i64 3
define <4 x float> @testf3(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testf3:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xscvspdpn f0, vs34
-; CHECK-NEXT: stfs f0, 12(r7)
+; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testf3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: stfs f0, 12(r7)
+; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stfiwx f0, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testf3:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: xscvspdpn f0, vs34
-; CHECK-P9-NEXT: stfs f0, 12(r7)
+; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stfiwx f0, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testf3:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 3
%arrayidx = getelementptr inbounds float, float* %ap, i64 3
define <4 x i32> @testi0(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testi0:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxswapd vs0, vs34
-; CHECK-NEXT: mfvsrwz r3, f0
-; CHECK-NEXT: stw r3, 12(r7)
+; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testi0:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 3
-; CHECK-BE-NEXT: mfvsrwz r3, f0
-; CHECK-BE-NEXT: stw r3, 12(r7)
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stfiwx f0, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testi0:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: vextuwrx r3, r3, v2
-; CHECK-P9-NEXT: stw r3, 12(r7)
+; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stfiwx f0, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testi0:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%arrayidx = getelementptr inbounds i32, i32* %ap, i64 3
; CHECK-LABEL: testi1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 1
-; CHECK-NEXT: mfvsrwz r3, f0
-; CHECK-NEXT: stw r3, 12(r7)
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testi1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mfvsrwz r3, vs34
-; CHECK-BE-NEXT: stw r3, 12(r7)
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stxsiwx vs34, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testi1:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: li r3, 4
-; CHECK-P9-NEXT: vextuwrx r3, r3, v2
-; CHECK-P9-NEXT: stw r3, 12(r7)
+; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stfiwx f0, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testi1:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stxsiwx vs34, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 1
%arrayidx = getelementptr inbounds i32, i32* %ap, i64 3
define <4 x i32> @testi2(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 {
; CHECK-LABEL: testi2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mfvsrwz r3, vs34
-; CHECK-NEXT: stw r3, 12(r7)
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stxsiwx vs34, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testi2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
-; CHECK-BE-NEXT: mfvsrwz r3, f0
-; CHECK-BE-NEXT: stw r3, 12(r7)
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stfiwx f0, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testi2:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: mfvsrwz r3, vs34
-; CHECK-P9-NEXT: stw r3, 12(r7)
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stxsiwx vs34, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testi2:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 2
%arrayidx = getelementptr inbounds i32, i32* %ap, i64 3
; CHECK-LABEL: testi3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT: mfvsrwz r3, f0
-; CHECK-NEXT: stw r3, 12(r7)
+; CHECK-NEXT: addi r3, r7, 12
+; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testi3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxswapd vs0, vs34
-; CHECK-BE-NEXT: mfvsrwz r3, f0
-; CHECK-BE-NEXT: stw r3, 12(r7)
+; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT: addi r3, r7, 12
+; CHECK-BE-NEXT: stfiwx f0, 0, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testi3:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: li r3, 12
-; CHECK-P9-NEXT: vextuwrx r3, r3, v2
-; CHECK-P9-NEXT: stw r3, 12(r7)
+; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT: addi r3, r7, 12
+; CHECK-P9-NEXT: stfiwx f0, 0, r3
; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-BE-LABEL: testi3:
+; CHECK-P9-BE: # %bb.0: # %entry
+; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-BE-NEXT: addi r3, r7, 12
+; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3
+; CHECK-P9-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 3
%arrayidx = getelementptr inbounds i32, i32* %ap, i64 3
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
; P9LE-NEXT: xvaddsp vs0, vs0, vs1
-; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
-; P9LE-NEXT: xscvspdpn f0, vs0
-; P9LE-NEXT: stfs f0, 0(r5)
+; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2
+; P9LE-NEXT: stfiwx f0, 0, r5
; P9LE-NEXT: blr
-
+;
; P9BE-LABEL: test_liwzx1:
; P9BE: # %bb.0:
; P9BE-NEXT: lfiwzx f0, 0, r3
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
; P9BE-NEXT: xvaddsp vs0, vs0, vs1
-; P9BE-NEXT: xscvspdpn f0, vs0
-; P9BE-NEXT: stfs f0, 0(r5)
+; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
+; P9BE-NEXT: stfiwx f0, 0, r5
; P9BE-NEXT: blr
-
+;
; P8LE-LABEL: test_liwzx1:
; P8LE: # %bb.0:
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
; P8LE-NEXT: xvaddsp vs0, vs0, vs1
-; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
-; P8LE-NEXT: xscvspdpn f0, vs0
-; P8LE-NEXT: stfsx f0, 0, r5
+; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2
+; P8LE-NEXT: stfiwx f0, 0, r5
; P8LE-NEXT: blr
-
+;
; P8BE-LABEL: test_liwzx1:
; P8BE: # %bb.0:
; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
; P8BE-NEXT: xvaddsp vs0, vs0, vs1
-; P8BE-NEXT: xscvspdpn f0, vs0
-; P8BE-NEXT: stfsx f0, 0, r5
+; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3
+; P8BE-NEXT: stfiwx f0, 0, r5
; P8BE-NEXT: blr
+
+
+
%a = load <1 x float>, <1 x float>* %A
%b = load <1 x float>, <1 x float>* %B
%X = fadd <1 x float> %a, %b
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
; P9LE-NEXT: xvsubsp vs0, vs0, vs1
-; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
-; P9LE-NEXT: xscvspdpn f0, vs0
+; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2
; P9LE-NEXT: mr r3, r5
-; P9LE-NEXT: stfs f0, 0(r5)
+; P9LE-NEXT: stfiwx f0, 0, r5
; P9LE-NEXT: blr
-
+;
; P9BE-LABEL: test_liwzx2:
; P9BE: # %bb.0:
; P9BE-NEXT: lfiwzx f0, 0, r3
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
; P9BE-NEXT: xvsubsp vs0, vs0, vs1
-; P9BE-NEXT: xscvspdpn f0, vs0
+; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P9BE-NEXT: mr r3, r5
-; P9BE-NEXT: stfs f0, 0(r5)
+; P9BE-NEXT: stfiwx f0, 0, r5
; P9BE-NEXT: blr
-
+;
; P8LE-LABEL: test_liwzx2:
; P8LE: # %bb.0:
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
; P8LE-NEXT: xvsubsp vs0, vs0, vs1
-; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
-; P8LE-NEXT: xscvspdpn f0, vs0
-; P8LE-NEXT: stfsx f0, 0, r5
+; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2
+; P8LE-NEXT: stfiwx f0, 0, r5
; P8LE-NEXT: blr
-
+;
; P8BE-LABEL: test_liwzx2:
; P8BE: # %bb.0:
; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
; P8BE-NEXT: xvsubsp vs0, vs0, vs1
-; P8BE-NEXT: xscvspdpn f0, vs0
-; P8BE-NEXT: stfsx f0, 0, r5
+; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3
+; P8BE-NEXT: stfiwx f0, 0, r5
; P8BE-NEXT: blr
+
+
+
+
+
%a = load <1 x float>, <1 x float>* %A
%b = load <1 x float>, <1 x float>* %B
%X = fsub <1 x float> %a, %b