// A dispatch group is 6 instructions.
let LoopMicroOpBufferSize = 60;
+ // As iops are dispatched to a slice, they are held in an independent slice
+ // issue queue until all register sources and other dependencies have been
+ // resolved and they can be issued. Each of four execution slices has an
+ // 11-entry iop issue queue.
+ let MicroOpBufferSize = 44;
+
let CompleteModel = 1;
// Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing
; CHECK-LABEL: test2
; CHECK: addi 3, 3, 8
; CHECK: lxvx [[LD:[0-9]+]], 0, 3
-; CHECK: addi 3, 4, 4
-; CHECK: stxvx [[LD]], 0, 3
+; CHECK: addi [[REG:[0-9]+]], 4, 4
+; CHECK: stxvx [[LD]], 0, [[REG]]
}
; CHECK-P9: xxpermdi vs0, f0, f0, 2
; CHECK-P9: xxspltw vs0, vs0, 3
; CHECK-P9: stxvx vs0, 0, r4
-; CHECK-P9: lis r4, 1024
; CHECK-P9: lfiwax f0, 0, r3
; CHECK-P9: addis r3, r2, .LC1@toc@ha
; CHECK-P9: ld r3, .LC1@toc@l(r3)
; CHECK-P9: xscvsxdsp f0, f0
; CHECK-P9: ld r3, 0(r3)
+; CHECK-P9: lis r4, 1024
; CHECK-P9: stfsx f0, r3, r4
; CHECK-P9: blr
entry:
; P9LE-LABEL: fromRegsConvftoi
; P8BE-LABEL: fromRegsConvftoi
; P8LE-LABEL: fromRegsConvftoi
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
; P9LE-LABEL: fromRegsConvdtoi
; P8BE-LABEL: fromRegsConvdtoi
; P8LE-LABEL: fromRegsConvdtoi
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
-; P9LE: xxmrghd
; P9LE: xvcvdpsxws
+; P9LE: xxmrghd
; P9LE: xvcvdpsxws
; P9LE: vmrgew v2
; P8BE: lfdx
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
-; P9LE: xxmrghd
; P9LE: xvcvdpsxws
+; P9LE: xxmrghd
; P9LE: xvcvdpsxws
; P9LE: vmrgew v2
; P8BE: lfdux
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
-; P9LE: xxmrghd
; P9LE: xvcvdpsxws
+; P9LE: xxmrghd
; P9LE: xvcvdpsxws
; P9LE: vmrgew v2
; P8BE: lfdux
; P9LE-LABEL: fromRegsConvftoui
; P8BE-LABEL: fromRegsConvftoui
; P8LE-LABEL: fromRegsConvftoui
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
; P9LE-LABEL: fromRegsConvdtoui
; P8BE-LABEL: fromRegsConvdtoui
; P8LE-LABEL: fromRegsConvdtoui
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
-; P9LE: xxmrghd
; P9LE: xvcvdpuxws
+; P9LE: xxmrghd
; P9LE: xvcvdpuxws
; P9LE: vmrgew v2
; P8BE: lfdx
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
-; P9LE: xxmrghd
; P9LE: xvcvdpuxws
+; P9LE: xxmrghd
; P9LE: xvcvdpuxws
; P9LE: vmrgew v2
; P8BE: lfdux
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
-; P9LE: xxmrghd
; P9LE: xvcvdpuxws
+; P9LE: xxmrghd
; P9LE: xvcvdpuxws
; P9LE: vmrgew v2
; P8BE: lfdux
%2 = call fp128 @llvm.ppc.scalar.insert.exp.qp(fp128 %0, i64 %1)
ret fp128 %2
; CHECK-LABEL: insert_exp_qp
-; CHECK: mtvsrd [[FPREG:f[0-9]+]], r3
-; CHECK: lxvx [[VECREG:v[0-9]+]]
+; CHECK-DAG: mtvsrd [[FPREG:f[0-9]+]], r3
+; CHECK-DAG: lxvx [[VECREG:v[0-9]+]]
; CHECK: xsiexpqp v2, [[VECREG]], [[FPREG]]
; CHECK: blr
}
align 16 %a) {
; CHECK-LABEL: testStruct_03:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 128(r1)
; CHECK-NEXT: std r10, 88(r1)
; CHECK-NEXT: std r9, 80(r1)
; CHECK-NEXT: std r8, 72(r1)
; CHECK-NEXT: std r5, 48(r1)
; CHECK-NEXT: std r4, 40(r1)
; CHECK-NEXT: std r3, 32(r1)
+; CHECK-NEXT: lxv v2, 128(r1)
; CHECK-NEXT: blr
; CHECK-BE-LABEL: testStruct_03:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 144(r1)
; CHECK-BE-NEXT: std r10, 104(r1)
; CHECK-BE-NEXT: std r9, 96(r1)
; CHECK-BE-NEXT: std r8, 88(r1)
; CHECK-BE-NEXT: std r5, 64(r1)
; CHECK-BE-NEXT: std r4, 56(r1)
; CHECK-BE-NEXT: std r3, 48(r1)
+; CHECK-BE-NEXT: lxv v2, 144(r1)
; CHECK-BE-NEXT: blr
entry:
%a7 = getelementptr inbounds %struct.With9fp128params,
define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) {
; CHECK-LABEL: testMixedAggregate_03:
; CHECK: # %bb.0: # %entry
-; CHECK-DAG: mtvsrwa v2, r3
-; CHECK-DAG: mtvsrdd v3, r6, r5
-; CHECK: mtvsrd v4, r10
+; CHECK: mtvsrwa v2, r3
; CHECK: xscvsdqp v2, v2
-; CHECK-DAG: xscvsdqp v[[REG:[0-9]+]], v4
-; CHECK-DAG: xsaddqp v2, v3, v2
+; CHECK: mtvsrdd v3, r6, r5
+; CHECK: xsaddqp v2, v3, v2
+; CHECK: mtvsrd v[[REG1:[0-9]+]], r10
+; CHECK: xscvsdqp v[[REG:[0-9]+]], v[[REG1]]
; CHECK: xsaddqp v2, v2, v[[REG]]
; CHECK-NEXT: blr
entry:
; CHECK-NEXT: std r7, 64(r1)
; CHECK-NEXT: std r10, 88(r1)
; CHECK-NEXT: std r9, 80(r1)
-; CHECK-NEXT: lxv v2, 64(r1)
; CHECK-NEXT: std r6, 56(r1)
; CHECK-NEXT: std r5, 48(r1)
; CHECK-NEXT: std r4, 40(r1)
; CHECK-NEXT: std r3, 32(r1)
+; CHECK-NEXT: lxv v2, 64(r1)
; CHECK-NEXT: blr
; CHECK-BE-LABEL: testNestedAggregate:
; CHECK-BE-NEXT: std r7, 80(r1)
; CHECK-BE-NEXT: std r10, 104(r1)
; CHECK-BE-NEXT: std r9, 96(r1)
-; CHECK-BE-NEXT: lxv v2, 80(r1)
; CHECK-BE-NEXT: std r6, 72(r1)
; CHECK-BE-NEXT: std r5, 64(r1)
; CHECK-BE-NEXT: std r4, 56(r1)
; CHECK-BE-NEXT: std r3, 48(r1)
+; CHECK-BE-NEXT: lxv v2, 80(r1)
; CHECK-BE-NEXT: blr
entry:
%c = getelementptr inbounds %struct.MixedC, %struct.MixedC* %a, i64 0, i32 1, i32 1
define fp128 @sum_float128(i32 signext %count, ...) {
; CHECK-LABEL: sum_float128:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis r11, r2, .LCPI17_0@toc@ha
-; CHECK-NEXT: cmpwi cr0, r3, 1
; CHECK-NEXT: std r10, 88(r1)
; CHECK-NEXT: std r9, 80(r1)
; CHECK-NEXT: std r8, 72(r1)
; CHECK-NEXT: std r7, 64(r1)
; CHECK-NEXT: std r6, 56(r1)
-; CHECK-NEXT: std r5, 48(r1)
+; CHECK-NEXT: cmpwi cr0, r3, 1
; CHECK-NEXT: std r4, 40(r1)
-; CHECK-NEXT: addi r11, r11, .LCPI17_0@toc@l
-; CHECK-NEXT: lxvx v2, 0, r11
+; CHECK-NEXT: addis [[REG:r[0-9]+]], r2, .LCPI17_0@toc@ha
+; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG]], .LCPI17_0@toc@l
+; CHECK-NEXT: lxvx v2, 0, [[REG1]]
+; CHECK-NEXT: std r5, 48(r1)
; CHECK-NEXT: bltlr cr0
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: addi r3, r1, 40
; CHECK-NEXT: lxvx v3, 0, r3
; CHECK-NEXT: xsaddqp v2, v3, v2
+; CHECK-NEXT: addi [[REG2:r[0-9]+]], r1, 72
+; CHECK-NEXT: std [[REG2]], -8(r1)
; CHECK-NEXT: lxv v3, 16(r3)
-; CHECK-NEXT: addi r3, r1, 72
-; CHECK-NEXT: std r3, -8(r1)
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: blr
entry:
fp128* nocapture %res) {
; CHECK-LABEL: qp_powi:
; CHECK: lxv v2, 0(r3)
-; CHECK: lwz r3, 0(r4)
+; CHECK: lwz r5, 0(r4)
; CHECK: bl __powikf2
; CHECK: blr
entry:
; CHECK-LABEL: qpConv2dp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r5, r2, .LC7@toc@ha
-; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: ld r5, .LC7@toc@l(r5)
; CHECK-NEXT: lxvx v2, 0, r5
; CHECK-NEXT: xscvqpdp v2, v2
+; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: stxsdx v2, r3, r4
; CHECK-NEXT: blr
entry:
; CHECK-LABEL: qpConv2sp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r5, r2, .LC7@toc@ha
-; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: ld r5, .LC7@toc@l(r5)
; CHECK-NEXT: lxv v2, 48(r5)
; CHECK-NEXT: xscvqpdpo v2, v2
; CHECK-NEXT: xsrsp f0, v2
+; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: stfsx f0, r3, r4
; CHECK-NEXT: blr
entry:
; CHECK-LABEL: dpConv2qp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscpsgndp v2, f1, f1
-; CHECK-NEXT: sldi r4, r4, 4
-; CHECK-NEXT: xscvdpqp v2, v2
+; CHECK-DAG: sldi r4, r4, 4
+; CHECK-DAG: xscvdpqp v2, v2
; CHECK-NEXT: stxvx v2, r3, r4
; CHECK-NEXT: blr
entry:
; CHECK-LABEL: spConv2qp_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscpsgndp v2, f1, f1
-; CHECK-NEXT: sldi r4, r4, 4
-; CHECK-NEXT: xscvdpqp v2, v2
+; CHECK-DAG: sldi r4, r4, 4
+; CHECK-DAG: xscvdpqp v2, v2
; CHECK-NEXT: stxvx v2, r3, r4
; CHECK-NEXT: blr
entry:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 4
; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: add r4, r3, r4
-; CHECK-NEXT: lxv v3, -16(r4)
+; CHECK-NEXT: add [[REG:r[0-9]+]], r3, r4
+; CHECK-NEXT: lxv v3, -16([[REG]])
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: blr
i32 signext %loopcnt, fp128* nocapture readnone %sum) {
; CHECK-LABEL: maxVecParam:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xsaddqp v2, v2, v3
-; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1)
; CHECK-NEXT: xsaddqp v2, v2, v4
; CHECK-NEXT: xsaddqp v2, v2, v5
; CHECK-NEXT: xsaddqp v2, v2, v6
; CHECK-NEXT: xsaddqp v2, v2, v11
; CHECK-NEXT: xsaddqp v2, v2, v12
; CHECK-NEXT: xsaddqp v2, v2, v13
+; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1)
; CHECK-NEXT: xssubqp v2, v2, v[[REG0]]
; CHECK-NEXT: blr
fp128 %p6, fp128 %p7, fp128 %p8, fp128 %p9, fp128 %p10,
define fp128 @mixParam_01(fp128 %a, i32 signext %i, fp128 %b) {
; CHECK-LABEL: mixParam_01:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mtvsrwa v4, r5
-; CHECK-NEXT: xsaddqp v2, v2, v3
-; CHECK-NEXT: xscvsdqp v[[REG0:[0-9]+]], v4
+; CHECK-DAG: mtvsrwa [[REG1:v[0-9]+]], r5
+; CHECK-DAG: xsaddqp v2, v2, v3
+; CHECK-NEXT: xscvsdqp v[[REG0:[0-9]+]], [[REG1]]
; CHECK-NEXT: xsaddqp v2, v2, v[[REG0]]
; CHECK-NEXT: blr
entry:
define fastcc fp128 @mixParam_01f(fp128 %a, i32 signext %i, fp128 %b) {
; CHECK-LABEL: mixParam_01f:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mtvsrwa v[[REG0:[0-9]+]], r3
-; CHECK-NEXT: xsaddqp v2, v2, v3
+; CHECK-DAG: mtvsrwa v[[REG0:[0-9]+]], r3
+; CHECK-DAG: xsaddqp v2, v2, v3
; CHECK-NEXT: xscvsdqp v[[REG1:[0-9]+]], v[[REG0]]
; CHECK-NEXT: xsaddqp v2, v2, v[[REG1]]
; CHECK-NEXT: blr
define fp128 @mixParam_02(fp128 %p1, double %p2, i64* nocapture %p3,
; CHECK-LABEL: mixParam_02:
; CHECK: # %bb.0: # %entry
-; CHECK-DAG: lwz r3, 96(r1)
+; CHECK: lwz r3, 96(r1)
; CHECK: add r4, r7, r9
-; CHECK-NEXT: xscpsgndp v[[REG0:[0-9]+]], f1, f1
-; CHECK-DAG: add r4, r4, r10
+; CHECK: add r4, r4, r10
+; CHECK: add r3, r4, r3
+; CHECK: clrldi r3, r3, 32
+; CHECK: std r3, 0(r6)
+; CHECK: lxv v[[REG1:[0-9]+]], 0(r8)
+; CHECK: xscpsgndp v[[REG0:[0-9]+]], f1, f1
; CHECK: xscvdpqp v[[REG0]], v[[REG0]]
-; CHECK-NEXT: add r3, r4, r3
-; CHECK-NEXT: clrldi r3, r3, 32
-; CHECK-NEXT: std r3, 0(r6)
-; CHECK-NEXT: lxv v[[REG1:[0-9]+]], 0(r8)
-; CHECK-NEXT: xsaddqp v2, v[[REG1]], v2
-; CHECK-NEXT: xsaddqp v2, v2, v3
+; CHECK: xsaddqp v2, v[[REG1]], v2
+; CHECK: xsaddqp v2, v2, v3
; CHECK-NEXT: blr
i16 signext %p4, fp128* nocapture readonly %p5,
i32 signext %p6, i8 zeroext %p7, i32 zeroext %p8) {
; CHECK-LABEL: mixParam_02f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: add r4, r4, r6
-; CHECK-NEXT: xscpsgndp v[[REG0:[0-9]+]], f1, f1
; CHECK-NEXT: add r4, r4, r7
-; CHECK-NEXT: xscvdpqp v[[REG0]], v[[REG0]]
; CHECK-NEXT: add r4, r4, r8
; CHECK-NEXT: clrldi r4, r4, 32
-; CHECK-NEXT: std r4, 0(r3)
-; CHECK-NEXT: lxv v[[REG1:[0-9]+]], 0(r5)
+; CHECK-DAG: std r4, 0(r3)
+; CHECK-DAG: lxv v[[REG1:[0-9]+]], 0(r5)
+; CHECK-NEXT: xscpsgndp v[[REG0:[0-9]+]], f1, f1
+; CHECK-NEXT: xscvdpqp v[[REG0]], v[[REG0]]
; CHECK-NEXT: xsaddqp v2, v[[REG1]], v2
; CHECK-NEXT: xsaddqp v2, v2, v[[REG0]]
; CHECK-NEXT: blr
; CHECK-LABEL: mixParam_03:
; CHECK: # %bb.0: # %entry
; CHECK-DAG: ld r3, 104(r1)
-; CHECK-DAG: mtvsrwa v[[REG2:[0-9]+]], r10
; CHECK-DAG: stxv v2, 0(r9)
-; CHECK-DAG: xscvsdqp v[[REG1:[0-9]+]], v[[REG2]]
; CHECK: stxvx v3, 0, r3
-; CHECK-NEXT: lxv v2, 0(r9)
+; CHECK: mtvsrwa v[[REG2:[0-9]+]], r10
+; CHECK-DAG: xscvsdqp v[[REG1:[0-9]+]], v[[REG2]]
+; CHECK-DAG: lxv v2, 0(r9)
; CHECK-NEXT: xsaddqp v2, v2, v[[REG1]]
; CHECK-NEXT: xscvqpdp v2, v2
; CHECK-NEXT: stxsd v2, 0(r5)
define fastcc void @mixParam_03f(fp128 %f1, double* nocapture %d1, <4 x i32> %vec1,
; CHECK-LABEL: mixParam_03f:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mtvsrwa v[[REG0:[0-9]+]], r5
-; CHECK-NEXT: stxv v[[REG1:[0-9]+]], 0(r4)
-; CHECK-NEXT: stxv v[[REG2:[0-9]+]], 0(r7)
-; CHECK-NEXT: lxv v[[REG1]], 0(r4)
+; CHECK-DAG: mtvsrwa v[[REG0:[0-9]+]], r5
+; CHECK-DAG: stxv v[[REG1:[0-9]+]], 0(r4)
+; CHECK-DAG: stxv v[[REG2:[0-9]+]], 0(r7)
+; CHECK-DAG: lxv v[[REG1]], 0(r4)
; CHECK-NEXT: xscvsdqp v[[REG3:[0-9]+]], v[[REG0]]
; CHECK-NEXT: xsaddqp v[[REG4:[0-9]+]], v[[REG1]], v[[REG3]]
; CHECK-NEXT: xscvqpdp v2, v[[REG4]]
ret i64 %conv
; CHECK-LABEL: qpConv2sdw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0@toc@ha
-; CHECK-DAG: ld r[[REG0]], .LC0@toc@l(r[[REG0]])
-; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
+; CHECK: ld r[[REG0]], .LC0@toc@l(r[[REG0]])
+; CHECK: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
; CHECK: xsaddqp v[[REG]], v[[REG]], v[[REG1]]
; CHECK-NEXT: xscvqpsdz v[[CONV:[0-9]+]], v[[REG]]
; CHECK-NEXT: mfvsrd r3, v[[CONV]]
; CHECK-LABEL: qpConv2sdw_testXForm
; CHECK: xscvqpsdz v[[CONV:[0-9]+]],
-; CHECK-NEXT: stxsdx v[[CONV]], r3, r4
+; CHECK: stxsdx v[[CONV]], r3, r4
; CHECK-NEXT: blr
}
ret i64 %conv
; CHECK-LABEL: qpConv2udw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0@toc@ha
; CHECK-DAG: ld r[[REG0]], .LC0@toc@l(r[[REG0]])
; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
; CHECK: xsaddqp v[[REG]], v[[REG]], v[[REG1]]
; CHECK-NEXT: xscvqpudz v[[CONV:[0-9]+]], v[[REG]]
; CHECK-NEXT: mfvsrd r3, v[[CONV]]
; CHECK-LABEL: qpConv2udw_testXForm
; CHECK: xscvqpudz v[[CONV:[0-9]+]],
-; CHECK-NEXT: stxsdx v[[CONV]], r3, r4
+; CHECK: stxsdx v[[CONV]], r3, r4
; CHECK-NEXT: blr
}
ret i32 %conv
; CHECK-LABEL: qpConv2sw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0@toc@ha
; CHECK-DAG: ld r[[REG0]], .LC0@toc@l(r[[REG0]])
; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
; CHECK-NEXT: xsaddqp v[[ADD:[0-9]+]], v[[REG]], v[[REG1]]
; CHECK-NEXT: xscvqpswz v[[CONV:[0-9]+]], v[[ADD]]
; CHECK-NEXT: mfvsrwz r[[REG2:[0-9]+]], v[[CONV]]
ret i32 %conv
; CHECK-LABEL: qpConv2uw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0@toc@ha
; CHECK-DAG: ld r[[REG0]], .LC0@toc@l(r[[REG0]])
; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
; CHECK-NEXT: xsaddqp v[[ADD:[0-9]+]], v[[REG]], v[[REG1]]
; CHECK-NEXT: xscvqpuwz v[[CONV:[0-9]+]], v[[ADD]]
; CHECK-NEXT: mfvsrwz r3, v[[CONV]]
define signext i16 @qpConv2shw_03(fp128* nocapture readonly %a) {
; CHECK-LABEL: qpConv2shw_03:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: lxv v3, 16(r4)
+; CHECK-NEXT: addis [[REG:r[0-9]+]], r2, .LC0@toc@ha
+; CHECK-NEXT: ld [[REG1:r[0-9]+]], .LC0@toc@l([[REG]])
+; CHECK-NEXT: lxv v3, 16([[REG1]])
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
define zeroext i16 @qpConv2uhw_03(fp128* nocapture readonly %a) {
; CHECK-LABEL: qpConv2uhw_03:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: lxv v3, 16(r4)
+; CHECK-NEXT: addis [[REG:r[0-9]+]], r2, .LC0@toc@ha
+; CHECK-NEXT: ld [[REG1:r[0-9]+]], .LC0@toc@l([[REG]])
+; CHECK-NEXT: lxv v3, 16([[REG1]])
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
define signext i8 @qpConv2sb_03(fp128* nocapture readonly %a) {
; CHECK-LABEL: qpConv2sb_03:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: lxv v3, 16(r4)
+; CHECK-NEXT: addis [[REG:r[0-9]+]], r2, .LC0@toc@ha
+; CHECK-NEXT: ld [[REG1:r[0-9]+]], .LC0@toc@l([[REG]])
+; CHECK-NEXT: lxv v3, 16([[REG1]])
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
define zeroext i8 @qpConv2ub_03(fp128* nocapture readonly %a) {
; CHECK-LABEL: qpConv2ub_03:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: lxv v3, 16(r4)
+; CHECK-NEXT: addis [[REG:r[0-9]+]], r2, .LC0@toc@ha
+; CHECK-NEXT: ld [[REG1:r[0-9]+]], .LC0@toc@l([[REG]])
+; CHECK-NEXT: lxv v3, 16([[REG1]])
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
; CHECK: li [[REG4:[0-9]+]], 5
; CHECK: [[LAB:[a-z0-9A-Z_.]+]]:
; CHECK: ld [[REG2:[0-9]+]], a@toc@l([[REG1]])
-; CHECK: ld [[REG3:[0-9]+]], 0([[REG2]])
; CHECK: stw [[REG4]], 8([[REG2]])
+; CHECK: ld [[REG3:[0-9]+]], 0([[REG2]])
; CHECK: stw [[REG4]], 8([[REG3]])
; CHECK: std [[REG3]], 0([[REG3]])
; CHECK: bdnz [[LAB]]
; CHECK-LABEL: bn_mul_comba8:
; CHECK: mulhdu
; CHECK-NEXT: mulld
-; CHECK-NEXT: mulhdu
-; CHECK-NEXT: mulld
+; CHECK: mulhdu
+; CHECK: mulld
; CHECK-NEXT: mulhdu
define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 signext %i_stride_pix1, i8* nocapture readonly %pix2) {
; CHECK-LABEL: test_pre_inc_disable_1:
; CHECK: # %bb.0: # %entry
-; CHECK: addis r6, r2
-; CHECK: addis r7, r2,
; CHECK: lfd f0, 0(r5)
-; CHECK: xxlxor v4, v4, v4
-; CHECK: addi r5, r6,
-; CHECK: addi r6, r7,
+; CHECK: addis r5, r2
+; CHECK: addi r5, r5,
; CHECK: lxvx v2, 0, r5
-; CHECK: lxvx v3, 0, r6
+; CHECK: addis r5, r2,
+; CHECK: addi r5, r5,
+; CHECK: lxvx v4, 0, r5
; CHECK: xxpermdi v5, f0, f0, 2
-; CHECK-DAG: vperm v[[VR1:[0-9]+]], v4, v5, v2
-; CHECK-DAG: vperm v[[VR2:[0-9]+]], v5, v4, v3
+; CHECK: xxlxor v3, v3, v3
+; CHECK-DAG: vperm v[[VR1:[0-9]+]], v5, v3, v4
+; CHECK-DAG: vperm v[[VR2:[0-9]+]], v3, v5, v2
; CHECK-DAG: xvnegsp v[[VR3:[0-9]+]], v[[VR1]]
; CHECK-DAG: xvnegsp v[[VR4:[0-9]+]], v[[VR2]]
; CHECK: .LBB0_1: # %for.cond1.preheader
; CHECK: lfd f0, 0(r3)
; CHECK: xxpermdi v1, f0, f0, 2
-; CHECK: vperm v6, v1, v4, v3
-; CHECK: vperm v1, v4, v1, v2
+; CHECK: vperm v6, v3, v1, v2
+; CHECK: vperm v1, v1, v3, v4
; CHECK-DAG: xvnegsp v6, v6
; CHECK-DAG: xvnegsp v1, v1
; CHECK-DAG: vabsduw v1, v1, v[[VR3]]
; CHECK-DAG: vabsduw v6, v6, v[[VR4]]
-; CHECK: vadduwm v1, v6, v1
+; CHECK: vadduwm v1, v1, v6
; CHECK: xxswapd v6, v1
; CHECK: vadduwm v1, v1, v6
; CHECK: xxspltw v6, v1, 2
; CHECK: vadduwm v1, v1, v6
-; CHECK: vextuwrx r7, r6, v1
+; CHECK: vextuwrx r7, r5, v1
; CHECK: ldux r8, r3, r4
; CHECK: add r3, r3, r4
-; CHECK: add r5, r7, r5
+; CHECK: add r6, r7, r6
; CHECK: mtvsrd f0, r8
; CHECK: xxswapd v1, vs0
-; CHECK: vperm v6, v1, v4, v3
-; CHECK: vperm v1, v4, v1, v2
+; CHECK: vperm v6, v3, v1, v2
+; CHECK: vperm v1, v1, v3, v4
; CHECK-DAG: xvnegsp v6, v6
; CHECK-DAG: xvnegsp v1, v1
; CHECK-DAG: vabsduw v1, v1, v[[VR3]]
; CHECK-DAG: vabsduw v6, v6, v[[VR4]]
-; CHECK: vadduwm v1, v6, v1
+; CHECK: vadduwm v1, v1, v6
; CHECK: xxswapd v6, v1
; CHECK: vadduwm v1, v1, v6
; CHECK: xxspltw v6, v1, 2
; CHECK: vadduwm v1, v1, v6
-; CHECK: vextuwrx r8, r6, v1
-; CHECK: add r5, r8, r5
+; CHECK: vextuwrx r7, r5, v1
+; CHECK: add r6, r7, r6
; CHECK: bdnz .LBB0_1
-; CHECK: extsw r3, r5
+; CHECK: extsw r3, r6
; CHECK: blr
; P9BE-LABEL: test_pre_inc_disable_1:
-; P9BE: addis r6, r2,
-; P9BE: addis r7, r2,
; P9BE: lfd f0, 0(r5)
-; P9BE: xxlxor v4, v4, v4
-; P9BE: addi r5, r6,
-; P9BE: addi r6, r7,
+; P9BE: addis r5, r2,
+; P9BE: addi r5, r5,
; P9BE: lxvx v2, 0, r5
-; P9BE: lxvx v3, 0, r6
+; P9BE: addis r5, r2,
+; P9BE: addi r5, r5,
+; P9BE: lxvx v4, 0, r5
; P9BE: xxlor v5, vs0, vs0
-; P9BE: li r6, 0
-; P9BE-DAG: vperm v[[VR1:[0-9]+]], v4, v5, v2
-; P9BE-DAG: vperm v[[VR2:[0-9]+]], v4, v5, v3
+; P9BE: xxlxor v3, v3, v3
+; P9BE-DAG: li r5, 0
+; P9BE-DAG: vperm v[[VR1:[0-9]+]], v3, v5, v2
+; P9BE-DAG: vperm v[[VR2:[0-9]+]], v3, v5, v4
; P9BE-DAG: xvnegsp v[[VR3:[0-9]+]], v[[VR1]]
; P9BE-DAG: xvnegsp v[[VR4:[0-9]+]], v[[VR2]]
; P9BE: .LBB0_1: # %for.cond1.preheader
; P9BE: lfd f0, 0(r3)
; P9BE: xxlor v1, vs0, vs0
-; P9BE: vperm v6, v4, v1, v3
-; P9BE: vperm v1, v4, v1, v2
+; P9BE: vperm v6, v3, v1, v4
+; P9BE: vperm v1, v3, v1, v2
; P9BE-DAG: xvnegsp v6, v6
; P9BE-DAG: xvnegsp v1, v1
; P9BE-DAG: vabsduw v1, v1, v[[VR3]]
; P9BE: vadduwm v1, v1, v6
; P9BE: xxspltw v6, v1, 1
; P9BE: vadduwm v1, v1, v6
-; P9BE: vextuwlx r[[GR1:[0-9]+]], r6, v1
+; P9BE: vextuwlx r[[GR1:[0-9]+]], r5, v1
+; P9BE: add r6, r[[GR1]], r6
; P9BE: ldux r[[GR2:[0-9]+]], r3, r4
; P9BE: add r3, r3, r4
-; P9BE: add r5, r[[GR1]], r5
; P9BE: mtvsrd v1, r[[GR2]]
-; P9BE: vperm v6, v4, v1, v3
-; P9BE: vperm v1, v4, v1, v2
+; P9BE: vperm v6, v3, v1, v2
+; P9BE: vperm v1, v3, v1, v4
; P9BE-DAG: xvnegsp v6, v6
; P9BE-DAG: xvnegsp v1, v1
-; P9BE-DAG: vabsduw v1, v1, v[[VR3]]
-; P9BE-DAG: vabsduw v6, v6, v[[VR4]]
-; P9BE: vadduwm v1, v6, v1
+; P9BE-DAG: vabsduw v1, v1, v[[VR4]]
+; P9BE-DAG: vabsduw v6, v6, v[[VR3]]
+; P9BE: vadduwm v1, v1, v6
; P9BE: xxswapd v6, v1
; P9BE: vadduwm v1, v1, v6
; P9BE: xxspltw v6, v1, 1
; P9BE: vadduwm v1, v1, v6
-; P9BE: vextuwlx r8, r6, v1
-; P9BE: add r5, r8, r5
+; P9BE: vextuwlx r7, r5, v1
+; P9BE: add r6, r7, r6
; P9BE: bdnz .LBB0_1
-; P9BE: extsw r3, r5
+; P9BE: extsw r3, r6
; P9BE: blr
entry:
%idx.ext = sext i32 %i_stride_pix1 to i64
; Function Attrs: norecurse nounwind readonly
define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* nocapture readonly %pix2) {
; CHECK-LABEL: test_pre_inc_disable_2:
-; CHECK: addis r5, r2,
-; CHECK: addis r6, r2,
; CHECK: lfd f0, 0(r3)
-; CHECK: lfd f1, 0(r4)
-; CHECK: xxlxor v0, v0, v0
-; CHECK: addi r3, r5, .LCPI1_0@toc@l
-; CHECK: addi r4, r6, .LCPI1_1@toc@l
-; CHECK: lxvx v2, 0, r3
-; CHECK: lxvx v3, 0, r4
-; CHECK: xxpermdi v4, f0, f0, 2
-; CHECK: xxpermdi v5, f1, f1, 2
-; CHECK: vperm v1, v4, v0, v2
-; CHECK: vperm v4, v0, v4, v3
-; CHECK: vperm v2, v5, v0, v2
-; CHECK: vperm v3, v0, v5, v3
-; CHECK: vabsduw v3, v4, v3
-; CHECK: vabsduw v2, v1, v2
-; CHECK: vadduwm v2, v2, v3
+; CHECK: addis r3, r2,
+; CHECK: addi r3, r3, .LCPI1_0@toc@l
+; CHECK: lxvx v4, 0, r3
+; CHECK: addis r3, r2,
+; CHECK: xxpermdi v2, f0, f0, 2
+; CHECK: lfd f0, 0(r4)
+; CHECK: addi r3, r3, .LCPI1_1@toc@l
+; CHECK: xxlxor v3, v3, v3
+; CHECK: lxvx v0, 0, r3
+; CHECK: xxpermdi v1, f0, f0, 2
+; CHECK: vperm v5, v2, v3, v4
+; CHECK: vperm v2, v3, v2, v0
+; CHECK: vperm v0, v3, v1, v0
+; CHECK: vperm v3, v1, v3, v4
+; CHECK: vabsduw v2, v2, v0
+; CHECK: vabsduw v3, v5, v3
+; CHECK: vadduwm v2, v3, v2
; CHECK: xxswapd v3, v2
; CHECK: vadduwm v2, v2, v3
; CHECK: xxspltw v3, v2, 2
; CHECK: blr
; P9BE-LABEL: test_pre_inc_disable_2:
-; P9BE: addis r5, r2,
-; P9BE: addis r6, r2,
; P9BE: lfd f0, 0(r3)
-; P9BE: lfd f1, 0(r4)
-; P9BE: xxlxor v5, v5, v5
-; P9BE: addi r3, r5,
-; P9BE: addi r4, r6,
-; P9BE: lxvx v2, 0, r3
-; P9BE: lxvx v3, 0, r4
-; P9BE: xxlor v4, vs0, vs0
-; P9BE: xxlor v0, vs1, vs1
-; P9BE: vperm v1, v5, v4, v2
-; P9BE: vperm v4, v5, v4, v3
-; P9BE: vperm v2, v5, v0, v2
-; P9BE: vperm v3, v5, v0, v3
-; P9BE: vabsduw v3, v4, v3
-; P9BE: vabsduw v2, v1, v2
-; P9BE: vadduwm v2, v2, v3
+; P9BE: addis r3, r2,
+; P9BE: addi r3, r3,
+; P9BE: lxvx v4, 0, r3
+; P9BE: addis r3, r2,
+; P9BE: addi r3, r3,
+; P9BE: xxlor v2, vs0, vs0
+; P9BE: lfd f0, 0(r4)
+; P9BE: lxvx v0, 0, r3
+; P9BE: xxlxor v3, v3, v3
+; P9BE: xxlor v1, vs0, vs0
+; P9BE: vperm v5, v3, v2, v4
+; P9BE: vperm v2, v3, v2, v0
+; P9BE: vperm v0, v3, v1, v0
+; P9BE: vperm v3, v3, v1, v4
+; P9BE: vabsduw v2, v2, v0
+; P9BE: vabsduw v3, v5, v3
+; P9BE: vadduwm v2, v3, v2
; P9BE: xxswapd v3, v2
; P9BE: vadduwm v2, v2, v3
; P9BE: xxspltw v3, v2, 1
; P9LE: # %bb.0:
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-NEXT: lfiwzx f1, 0, r4
-; P9LE-NEXT: mr r3, r5
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
; P9LE-NEXT: xvsubsp vs0, vs0, vs1
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P9LE-NEXT: xscvspdpn f0, vs0
+; P9LE-NEXT: mr r3, r5
; P9LE-NEXT: stfs f0, 0(r5)
; P9LE-NEXT: blr
; P9BE: # %bb.0:
; P9BE-NEXT: lfiwzx f0, 0, r3
; P9BE-NEXT: lfiwzx f1, 0, r4
-; P9BE-NEXT: mr r3, r5
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
; P9BE-NEXT: xvsubsp vs0, vs0, vs1
; P9BE-NEXT: xscvspdpn f0, vs0
+; P9BE-NEXT: mr r3, r5
; P9BE-NEXT: stfs f0, 0(r5)
; P9BE-NEXT: blr
; P9LE-LABEL: s2v_test_f2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
-; P9LE-NEXT: xxspltw v2, v2, 2
-; P9LE-NEXT: lfiwzx f0, 0, r3
+; P9LE-DAG: xxspltw v2, v2, 2
+; P9LE-DAG: lfiwzx f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f2:
; P9BE: # %bb.0: # %entry
; P9BE: addi r3, r3, 4
-; P9BE: xxspltw v2, v2, 1
-; P9BE: lfiwzx f0, 0, r3
+; P9BE-DAG: xxspltw v2, v2, 1
+; P9BE-DAG: lfiwzx f0, 0, r3
; P9BE-NEXT: xxsldwi v3, f0, f0, 1
; P9BE: vmrghw v2, v3, v2
; P9BE-NEXT: blr
; P9LE-LABEL: s2v_test_f3:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 2
-; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: lfiwzx f0, r3, r4
-; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-DAG: xxspltw v2, v2, 2
+; P9LE-DAG: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f3:
; P9BE: # %bb.0: # %entry
; P9BE: sldi r4, r7, 2
-; P9BE: xxspltw v2, v2, 1
; P9BE: lfiwzx f0, r3, r4
-; P9BE-NEXT: xxsldwi v3, f0, f0, 1
+; P9BE-DAG: xxspltw v2, v2, 1
+; P9BE-DAG: xxsldwi v3, f0, f0, 1
; P9BE: vmrghw v2, v3, v2
; P9BE-NEXT: blr
; P9LE-LABEL: s2v_test_f4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
-; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: lfiwzx f0, 0, r3
-; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-DAG: xxspltw v2, v2, 2
+; P9LE-DAG: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f4:
; P9BE: # %bb.0: # %entry
; P9BE: addi r3, r3, 4
-; P9BE: xxspltw v2, v2, 1
; P9BE: lfiwzx f0, 0, r3
-; P9BE-NEXT: xxsldwi v3, f0, f0, 1
+; P9BE-DAG: xxspltw v2, v2, 1
+; P9BE-DAG: xxsldwi v3, f0, f0, 1
; P9BE: vmrghw v2, v3, v2
; P9BE-NEXT: blr
; CHECK-LABEL: spConv2sdw_x
; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3
-; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 3
+; CHECK-DAG: xscvdpsxds [[CONV:[0-9]+]], [[LD]]
; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]]
; CHECK-NEXT: blr
; CHECK-LABEL: spConv2sw_x
; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2
-; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 2
+; CHECK-DAG: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]]
; CHECK-NEXT: blr
; CHECK-LABEL: spConv2shw_x
; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK: sldi [[REG:[0-9]+]], 5, 1
-; CHECK: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 1
+; CHECK-DAG: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]]
; CHECK-NEXT: blr
; CHECK-LABEL: spConv2udw_x
; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3
-; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 3
+; CHECK-DAG: xscvdpuxds [[CONV:[0-9]+]], [[LD]]
; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]]
; CHECK-NEXT: blr
; CHECK-LABEL: spConv2uw_x
; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2
-; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 2
+; CHECK-DAG: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]]
; CHECK-NEXT: blr
; CHECK-LABEL: spConv2uhw_x
; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK: sldi [[REG:[0-9]+]], 5, 1
-; CHECK: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 1
+; CHECK-DAG: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]]
; CHECK-NEXT: blr
; Function Attrs: norecurse nounwind writeonly
define void @initCombList(%0* nocapture, i32 signext) local_unnamed_addr #0 {
; CHECK-LABEL: initCombList:
-; CHECK: addi 3, 3, -8
-; CHECK-NEXT: stwu 5, 64(4)
+; CHECK: addi 4, 4, -8
+; CHECK: stwu 5, 64(3)
; CHECK-ITIN-LABEL: initCombList:
; CHECK-ITIN: stwu 5, 64(4)
; RUN: llc -relocation-model=pic -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
-; RUN: -mattr=-power9-vector < %s | FileCheck %s
+; RUN: -mattr=-power9-vector < %s | FileCheck %s --check-prefix=CHECK-P9-NOVECTOR
; These tests verify that VSX swap optimization works when loading a scalar
; into a vector register.
; CHECK: stxvd2x vs0, 0, r3
; CHECK: blr
;
+; CHECK-P9-NOVECTOR-LABEL: bar0:
+; CHECK-P9-NOVECTOR: # %bb.0: # %entry
+; CHECK-P9-NOVECTOR: addis r3, r2, .LC0@toc@ha
+; CHECK-P9-NOVECTOR: ld r3, .LC0@toc@l(r3)
+; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha
+; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha
+; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3)
+; CHECK-P9-NOVECTOR: xxpermdi vs0, vs1, vs0, 1
+; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR: blr
+;
; CHECK-P9-LABEL: bar0:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9: addis r3, r2, .LC0@toc@ha
-; CHECK-P9: addis r4, r2, .LC1@toc@ha
; CHECK-P9: ld r3, .LC0@toc@l(r3)
-; CHECK-P9: ld r4, .LC1@toc@l(r4)
-; CHECK-P9: lfd f0, 0(r3)
-; CHECK-P9: lxvx vs1, 0, r4
+; CHECK-P9: lxvx vs0, 0, r3
+; CHECK-P9: addis r3, r2, .LC1@toc@ha
+; CHECK-P9: ld r3, .LC1@toc@l(r3)
+; CHECK-P9: lfd f1, 0(r3)
; CHECK-P9: addis r3, r2, .LC2@toc@ha
; CHECK-P9: ld r3, .LC2@toc@l(r3)
-; CHECK-P9: xxpermdi vs0, f0, f0, 2
-; CHECK-P9: xxpermdi vs0, vs1, vs0, 1
+; CHECK-P9: xxpermdi vs1, f1, f1, 2
+; CHECK-P9: xxpermdi vs0, vs0, vs1, 1
; CHECK-P9: stxvx vs0, 0, r3
; CHECK-P9: blr
entry:
; CHECK: stxvd2x vs0, 0, r3
; CHECK: blr
;
+; CHECK-P9-NOVECTOR-LABEL: bar1:
+; CHECK-P9-NOVECTOR: # %bb.0: # %entry
+; CHECK-P9-NOVECTOR: addis r3, r2, .LC0@toc@ha
+; CHECK-P9-NOVECTOR: ld r3, .LC0@toc@l(r3)
+; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha
+; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha
+; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3)
+; CHECK-P9-NOVECTOR: xxmrghd vs0, vs0, vs1
+; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR: blr
+;
; CHECK-P9-LABEL: bar1:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9: addis r3, r2, .LC0@toc@ha
-; CHECK-P9: addis r4, r2, .LC1@toc@ha
; CHECK-P9: ld r3, .LC0@toc@l(r3)
-; CHECK-P9: ld r4, .LC1@toc@l(r4)
-; CHECK-P9: lfd f0, 0(r3)
-; CHECK-P9: lxvx vs1, 0, r4
+; CHECK-P9: lxvx vs0, 0, r3
+; CHECK-P9: addis r3, r2, .LC1@toc@ha
+; CHECK-P9: ld r3, .LC1@toc@l(r3)
+; CHECK-P9: lfd f1, 0(r3)
; CHECK-P9: addis r3, r2, .LC2@toc@ha
; CHECK-P9: ld r3, .LC2@toc@l(r3)
-; CHECK-P9: xxpermdi vs0, f0, f0, 2
-; CHECK-P9: xxmrgld vs0, vs0, vs1
+; CHECK-P9: xxpermdi vs1, f1, f1, 2
+; CHECK-P9: xxmrgld vs0, vs1, vs0
; CHECK-P9: stxvx vs0, 0, r3
; CHECK-P9: blr
entry:
ret void
; CHECK-P9-LABEL: @test8
; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
; CHECK-P9: xvcvuxddp
; CHECK-P8-LABEL: @test8
; CHECK-P8: vperm
ret void
; CHECK-P9-LABEL: @test4
; CHECK-P9: vperm
-; CHECK-P9: vperm
; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
; CHECK-P9: xvcvuxddp
; CHECK-P8-LABEL: @test4
; CHECK-P8: vperm
ret void
; CHECK-P9-LABEL: @stest8
; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vextsh2d
-; CHECK-P9: vextsh2d
-; CHECK-P9: vextsh2d
; CHECK-P9: vextsh2d
; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
; CHECK-P9: xvcvsxddp
}
ret void
; CHECK-P9-LABEL: @stest4
; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vextsh2d
; CHECK-P9: vextsh2d
; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
; CHECK-P9: xvcvsxddp
}
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxswapd v2, vs1
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f1
; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
; CHECK-BE-NEXT: vmrghh v2, v2, v3
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: mfvsrwz r5, f3
; CHECK-P9-NEXT: mfvsrwz r3, f0
-; CHECK-P9-NEXT: mfvsrwz r4, f1
-; CHECK-P9-NEXT: mfvsrwz r6, f2
-; CHECK-P9-NEXT: mtvsrd f2, r5
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, v2
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, v2
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglh v2, v4, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT: xxswapd vs1, v2
-; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT: xscvspdpn f3, v2
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: mfvsrwz r5, f3
-; CHECK-BE-NEXT: sldi r5, r5, 48
; CHECK-BE-NEXT: mfvsrwz r3, f0
-; CHECK-BE-NEXT: mfvsrwz r4, f1
-; CHECK-BE-NEXT: mfvsrwz r6, f2
-; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: xxswapd vs0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xscvspdpn f0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: vmrghh v2, v4, v2
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs3, vs1
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs0
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: mfvsrwz r5, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: mfvsrwz r3, f2
-; CHECK-P9-NEXT: mfvsrwz r4, f3
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: xxswapd vs2, vs1
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: xscvspdpn f2, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
-; CHECK-P9-NEXT: vmrglh v4, v1, v0
-; CHECK-P9-NEXT: vmrglh v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd vs1, vs0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvspdpn f1, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs3, vs1
-; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs6, vs0
-; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r5, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
-; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: mfvsrwz r3, f2
-; CHECK-BE-NEXT: mfvsrwz r4, f3
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v6, r9
+; CHECK-BE-NEXT: xxswapd vs2, vs1
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
-; CHECK-BE-NEXT: vmrghh v4, v1, v0
-; CHECK-BE-NEXT: vmrghh v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: sldi r3, r3, 48
; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 16(r4)
-; CHECK-P9-NEXT: lxv vs3, 0(r4)
-; CHECK-P9-NEXT: lxv vs0, 48(r4)
-; CHECK-P9-NEXT: lxv vs1, 32(r4)
-; CHECK-P9-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT: xxswapd vs5, vs3
-; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd vs8, vs2
-; CHECK-P9-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs11, vs1
-; CHECK-P9-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: lxv vs3, 16(r4)
+; CHECK-P9-NEXT: xscvspdpn f5, vs1
+; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT: xscvspdpn f8, vs3
+; CHECK-P9-NEXT: xxswapd vs4, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1
; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
+; CHECK-P9-NEXT: xscvdpsxws f5, f5
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f8, f8
+; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3
+; CHECK-P9-NEXT: xxswapd vs7, vs3
; CHECK-P9-NEXT: xscvspdpn f6, vs6
+; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1
; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
-; CHECK-P9-NEXT: xscvspdpn f9, vs9
-; CHECK-P9-NEXT: xscvspdpn f10, vs10
-; CHECK-P9-NEXT: xscvspdpn f11, vs11
-; CHECK-P9-NEXT: xscvspdpn f12, vs12
-; CHECK-P9-NEXT: xscvspdpn f13, vs13
-; CHECK-P9-NEXT: xscvspdpn v2, v2
-; CHECK-P9-NEXT: xscvspdpn v3, v3
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
; CHECK-P9-NEXT: xscvdpsxws f6, f6
+; CHECK-P9-NEXT: mfvsrwz r5, f5
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
+; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mtvsrd f5, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f8
+; CHECK-P9-NEXT: mtvsrd f8, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f2
+; CHECK-P9-NEXT: lxv vs0, 32(r4)
+; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 3
+; CHECK-P9-NEXT: xxswapd vs10, vs0
+; CHECK-P9-NEXT: xscvspdpn f9, vs9
+; CHECK-P9-NEXT: xscvspdpn f10, vs10
; CHECK-P9-NEXT: xscvdpsxws f9, f9
; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r4, f3
-; CHECK-P9-NEXT: mfvsrwz r5, f2
-; CHECK-P9-NEXT: mfvsrwz r12, f1
-; CHECK-P9-NEXT: mfvsrwz r0, f0
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r9, f7
-; CHECK-P9-NEXT: mfvsrwz r10, f8
-; CHECK-P9-NEXT: mfvsrwz r11, f9
-; CHECK-P9-NEXT: mfvsrwz r30, f10
-; CHECK-P9-NEXT: mfvsrwz r29, f11
-; CHECK-P9-NEXT: mfvsrwz r28, f12
-; CHECK-P9-NEXT: mfvsrwz r27, f13
-; CHECK-P9-NEXT: mfvsrwz r26, v2
-; CHECK-P9-NEXT: mfvsrwz r25, v3
-; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: mtvsrd f2, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f4
+; CHECK-P9-NEXT: mtvsrd f4, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f1
; CHECK-P9-NEXT: mtvsrd f1, r5
-; CHECK-P9-NEXT: mtvsrd f8, r12
-; CHECK-P9-NEXT: mtvsrd f9, r0
-; CHECK-P9-NEXT: mtvsrd f2, r6
-; CHECK-P9-NEXT: mtvsrd f3, r7
-; CHECK-P9-NEXT: mtvsrd f4, r8
-; CHECK-P9-NEXT: mtvsrd f5, r9
-; CHECK-P9-NEXT: mtvsrd f6, r10
-; CHECK-P9-NEXT: mtvsrd f7, r11
-; CHECK-P9-NEXT: mtvsrd f10, r30
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f11, r29
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f12, r28
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f13, r27
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v2, r26
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v3, r25
-; CHECK-P9-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: xxswapd v5, vs2
+; CHECK-P9-NEXT: mfvsrwz r5, f6
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: xxswapd v3, vs4
+; CHECK-P9-NEXT: xscvspdpn f2, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mtvsrd f6, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f7
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: lxv vs1, 48(r4)
+; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs5
+; CHECK-P9-NEXT: mtvsrd f7, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f3
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs6
+; CHECK-P9-NEXT: xxswapd v5, vs7
+; CHECK-P9-NEXT: mtvsrd f3, r5
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xxswapd v0, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs6
-; CHECK-P9-NEXT: xxswapd v8, vs1
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs10
-; CHECK-P9-NEXT: xxswapd v11, vs11
-; CHECK-P9-NEXT: xxswapd v12, vs8
-; CHECK-P9-NEXT: xxswapd v13, vs12
-; CHECK-P9-NEXT: xxswapd v14, vs13
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs9
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglh v5, v0, v5
-; CHECK-P9-NEXT: vmrglh v4, v4, v1
-; CHECK-P9-NEXT: vmrglh v0, v7, v6
-; CHECK-P9-NEXT: vmrglh v1, v8, v9
-; CHECK-P9-NEXT: vmrglh v6, v11, v10
-; CHECK-P9-NEXT: vmrglh v7, v12, v13
-; CHECK-P9-NEXT: vmrglh v2, v2, v14
-; CHECK-P9-NEXT: vmrglh v3, v15, v3
-; CHECK-P9-NEXT: vmrglw v4, v4, v5
-; CHECK-P9-NEXT: vmrglw v5, v1, v0
-; CHECK-P9-NEXT: vmrglw v0, v7, v6
+; CHECK-P9-NEXT: vmrglh v4, v5, v4
+; CHECK-P9-NEXT: xxswapd v5, vs8
+; CHECK-P9-NEXT: vmrglh v5, v5, v0
+; CHECK-P9-NEXT: mfvsrwz r4, f2
+; CHECK-P9-NEXT: mtvsrd f2, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f0
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT: xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: xxmrgld vs2, v3, v2
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: vmrglh v2, v4, v2
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs1
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r5, f9
+; CHECK-P9-NEXT: mtvsrd f9, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f10
+; CHECK-P9-NEXT: mtvsrd f10, r5
+; CHECK-P9-NEXT: xxswapd v0, vs9
+; CHECK-P9-NEXT: xxswapd v1, vs10
+; CHECK-P9-NEXT: vmrglh v0, v1, v0
+; CHECK-P9-NEXT: vmrglw v2, v2, v0
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 0(r4)
-; CHECK-BE-NEXT: lxv vs3, 16(r4)
-; CHECK-BE-NEXT: lxv vs0, 32(r4)
-; CHECK-BE-NEXT: lxv vs1, 48(r4)
-; CHECK-BE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs5, vs3
-; CHECK-BE-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd vs8, vs2
-; CHECK-BE-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs11, vs1
-; CHECK-BE-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd v2, vs0
-; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: lxv vs1, 16(r4)
+; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f9, vs9
-; CHECK-BE-NEXT: xscvspdpn f10, vs10
-; CHECK-BE-NEXT: xscvspdpn f11, vs11
-; CHECK-BE-NEXT: xscvspdpn f12, vs12
-; CHECK-BE-NEXT: xscvspdpn f13, vs13
-; CHECK-BE-NEXT: xscvspdpn v2, v2
-; CHECK-BE-NEXT: xscvspdpn v3, v3
+; CHECK-BE-NEXT: xxswapd vs3, vs1
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mfvsrwz r5, f2
+; CHECK-BE-NEXT: xscvspdpn f4, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v2, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f4
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v3, r5
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v3, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f2
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r5, r5, 48
; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v5, r5
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: vmrghh v4, v5, v4
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: lxv vs1, 48(r4)
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v5, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f0
+; CHECK-BE-NEXT: lxv vs0, 32(r4)
+; CHECK-BE-NEXT: xscvspdpn f5, vs1
+; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
-; CHECK-BE-NEXT: mfvsrwz r4, f3
-; CHECK-BE-NEXT: mfvsrwz r5, f2
-; CHECK-BE-NEXT: mfvsrwz r12, f1
-; CHECK-BE-NEXT: mfvsrwz r0, f0
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r9, f7
-; CHECK-BE-NEXT: mfvsrwz r10, f8
-; CHECK-BE-NEXT: mfvsrwz r11, f9
-; CHECK-BE-NEXT: mfvsrwz r30, f10
-; CHECK-BE-NEXT: mfvsrwz r29, f11
-; CHECK-BE-NEXT: mfvsrwz r28, f12
-; CHECK-BE-NEXT: mfvsrwz r27, f13
-; CHECK-BE-NEXT: mfvsrwz r26, v2
-; CHECK-BE-NEXT: mfvsrwz r25, v3
-; CHECK-BE-NEXT: sldi r4, r4, 48
; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r12, r12, 48
-; CHECK-BE-NEXT: sldi r0, r0, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
-; CHECK-BE-NEXT: sldi r11, r11, 48
-; CHECK-BE-NEXT: sldi r30, r30, 48
-; CHECK-BE-NEXT: sldi r29, r29, 48
-; CHECK-BE-NEXT: sldi r28, r28, 48
-; CHECK-BE-NEXT: sldi r27, r27, 48
-; CHECK-BE-NEXT: sldi r26, r26, 48
-; CHECK-BE-NEXT: sldi r25, r25, 48
+; CHECK-BE-NEXT: xxswapd vs3, vs1
+; CHECK-BE-NEXT: mtvsrd v0, r5
+; CHECK-BE-NEXT: vmrghh v5, v5, v0
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mfvsrwz r4, f5
+; CHECK-BE-NEXT: xxmrghd vs4, v3, v2
+; CHECK-BE-NEXT: sldi r4, r4, 48
; CHECK-BE-NEXT: mtvsrd v2, r4
-; CHECK-BE-NEXT: mtvsrd v3, r5
-; CHECK-BE-NEXT: mtvsrd v10, r12
-; CHECK-BE-NEXT: mtvsrd v14, r0
-; CHECK-BE-NEXT: mtvsrd v4, r6
-; CHECK-BE-NEXT: mtvsrd v5, r7
-; CHECK-BE-NEXT: mtvsrd v0, r8
-; CHECK-BE-NEXT: mtvsrd v1, r9
-; CHECK-BE-NEXT: mtvsrd v6, r10
-; CHECK-BE-NEXT: mtvsrd v7, r11
-; CHECK-BE-NEXT: mtvsrd v8, r30
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v9, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r28
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v12, r27
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v13, r26
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v15, r25
-; CHECK-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghh v4, v5, v4
-; CHECK-BE-NEXT: vmrghh v2, v2, v0
-; CHECK-BE-NEXT: vmrghh v5, v6, v1
-; CHECK-BE-NEXT: vmrghh v3, v3, v7
-; CHECK-BE-NEXT: vmrghh v0, v9, v8
-; CHECK-BE-NEXT: vmrghh v1, v10, v11
-; CHECK-BE-NEXT: vmrghh v6, v13, v12
-; CHECK-BE-NEXT: vmrghh v7, v14, v15
-; CHECK-BE-NEXT: vmrghw v2, v2, v4
-; CHECK-BE-NEXT: vmrghw v3, v3, v5
-; CHECK-BE-NEXT: vmrghw v4, v1, v0
-; CHECK-BE-NEXT: vmrghw v5, v7, v6
-; CHECK-BE-NEXT: xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT: xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: blr
+; CHECK-BE-NEXT: mfvsrwz r4, f2
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: stxv vs4, 0(r3)
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f3
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghh v2, v2, v4
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f0
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v5, r4
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
+; CHECK-BE-NEXT: xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
%1 = fptoui <16 x float> %a to <16 x i16>
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxswapd v2, vs1
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f1
; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
; CHECK-BE-NEXT: vmrghh v2, v2, v3
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: mfvsrwz r5, f3
; CHECK-P9-NEXT: mfvsrwz r3, f0
-; CHECK-P9-NEXT: mfvsrwz r4, f1
-; CHECK-P9-NEXT: mfvsrwz r6, f2
-; CHECK-P9-NEXT: mtvsrd f2, r5
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, v2
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, v2
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglh v2, v4, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT: xxswapd vs1, v2
-; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT: xscvspdpn f3, v2
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: mfvsrwz r5, f3
-; CHECK-BE-NEXT: sldi r5, r5, 48
; CHECK-BE-NEXT: mfvsrwz r3, f0
-; CHECK-BE-NEXT: mfvsrwz r4, f1
-; CHECK-BE-NEXT: mfvsrwz r6, f2
-; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: xxswapd vs0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xscvspdpn f0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: vmrghh v2, v4, v2
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs3, vs1
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs0
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: mfvsrwz r5, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: mfvsrwz r3, f2
-; CHECK-P9-NEXT: mfvsrwz r4, f3
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: xxswapd vs2, vs1
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: xscvspdpn f2, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
-; CHECK-P9-NEXT: vmrglh v4, v1, v0
-; CHECK-P9-NEXT: vmrglh v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd vs1, vs0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvspdpn f1, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs3, vs1
-; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs6, vs0
-; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r5, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
-; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: mfvsrwz r3, f2
-; CHECK-BE-NEXT: mfvsrwz r4, f3
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v6, r9
+; CHECK-BE-NEXT: xxswapd vs2, vs1
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
-; CHECK-BE-NEXT: vmrghh v4, v1, v0
-; CHECK-BE-NEXT: vmrghh v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: sldi r3, r3, 48
; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 16(r4)
-; CHECK-P9-NEXT: lxv vs3, 0(r4)
-; CHECK-P9-NEXT: lxv vs0, 48(r4)
-; CHECK-P9-NEXT: lxv vs1, 32(r4)
-; CHECK-P9-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT: xxswapd vs5, vs3
-; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd vs8, vs2
-; CHECK-P9-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs11, vs1
-; CHECK-P9-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: lxv vs3, 16(r4)
+; CHECK-P9-NEXT: xscvspdpn f5, vs1
+; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT: xscvspdpn f8, vs3
+; CHECK-P9-NEXT: xxswapd vs4, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1
; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
+; CHECK-P9-NEXT: xscvdpsxws f5, f5
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f8, f8
+; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3
+; CHECK-P9-NEXT: xxswapd vs7, vs3
; CHECK-P9-NEXT: xscvspdpn f6, vs6
+; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1
; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
-; CHECK-P9-NEXT: xscvspdpn f9, vs9
-; CHECK-P9-NEXT: xscvspdpn f10, vs10
-; CHECK-P9-NEXT: xscvspdpn f11, vs11
-; CHECK-P9-NEXT: xscvspdpn f12, vs12
-; CHECK-P9-NEXT: xscvspdpn f13, vs13
-; CHECK-P9-NEXT: xscvspdpn v2, v2
-; CHECK-P9-NEXT: xscvspdpn v3, v3
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
; CHECK-P9-NEXT: xscvdpsxws f6, f6
+; CHECK-P9-NEXT: mfvsrwz r5, f5
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
+; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mtvsrd f5, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f8
+; CHECK-P9-NEXT: mtvsrd f8, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f2
+; CHECK-P9-NEXT: lxv vs0, 32(r4)
+; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 3
+; CHECK-P9-NEXT: xxswapd vs10, vs0
+; CHECK-P9-NEXT: xscvspdpn f9, vs9
+; CHECK-P9-NEXT: xscvspdpn f10, vs10
; CHECK-P9-NEXT: xscvdpsxws f9, f9
; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r4, f3
-; CHECK-P9-NEXT: mfvsrwz r5, f2
-; CHECK-P9-NEXT: mfvsrwz r12, f1
-; CHECK-P9-NEXT: mfvsrwz r0, f0
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r9, f7
-; CHECK-P9-NEXT: mfvsrwz r10, f8
-; CHECK-P9-NEXT: mfvsrwz r11, f9
-; CHECK-P9-NEXT: mfvsrwz r30, f10
-; CHECK-P9-NEXT: mfvsrwz r29, f11
-; CHECK-P9-NEXT: mfvsrwz r28, f12
-; CHECK-P9-NEXT: mfvsrwz r27, f13
-; CHECK-P9-NEXT: mfvsrwz r26, v2
-; CHECK-P9-NEXT: mfvsrwz r25, v3
-; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: mtvsrd f2, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f4
+; CHECK-P9-NEXT: mtvsrd f4, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f1
; CHECK-P9-NEXT: mtvsrd f1, r5
-; CHECK-P9-NEXT: mtvsrd f8, r12
-; CHECK-P9-NEXT: mtvsrd f9, r0
-; CHECK-P9-NEXT: mtvsrd f2, r6
-; CHECK-P9-NEXT: mtvsrd f3, r7
-; CHECK-P9-NEXT: mtvsrd f4, r8
-; CHECK-P9-NEXT: mtvsrd f5, r9
-; CHECK-P9-NEXT: mtvsrd f6, r10
-; CHECK-P9-NEXT: mtvsrd f7, r11
-; CHECK-P9-NEXT: mtvsrd f10, r30
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f11, r29
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f12, r28
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f13, r27
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v2, r26
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v3, r25
-; CHECK-P9-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: xxswapd v5, vs2
+; CHECK-P9-NEXT: mfvsrwz r5, f6
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: xxswapd v3, vs4
+; CHECK-P9-NEXT: xscvspdpn f2, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mtvsrd f6, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f7
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: lxv vs1, 48(r4)
+; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs5
+; CHECK-P9-NEXT: mtvsrd f7, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f3
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs6
+; CHECK-P9-NEXT: xxswapd v5, vs7
+; CHECK-P9-NEXT: mtvsrd f3, r5
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xxswapd v0, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs6
-; CHECK-P9-NEXT: xxswapd v8, vs1
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs10
-; CHECK-P9-NEXT: xxswapd v11, vs11
-; CHECK-P9-NEXT: xxswapd v12, vs8
-; CHECK-P9-NEXT: xxswapd v13, vs12
-; CHECK-P9-NEXT: xxswapd v14, vs13
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs9
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglh v5, v0, v5
-; CHECK-P9-NEXT: vmrglh v4, v4, v1
-; CHECK-P9-NEXT: vmrglh v0, v7, v6
-; CHECK-P9-NEXT: vmrglh v1, v8, v9
-; CHECK-P9-NEXT: vmrglh v6, v11, v10
-; CHECK-P9-NEXT: vmrglh v7, v12, v13
-; CHECK-P9-NEXT: vmrglh v2, v2, v14
-; CHECK-P9-NEXT: vmrglh v3, v15, v3
-; CHECK-P9-NEXT: vmrglw v4, v4, v5
-; CHECK-P9-NEXT: vmrglw v5, v1, v0
-; CHECK-P9-NEXT: vmrglw v0, v7, v6
+; CHECK-P9-NEXT: vmrglh v4, v5, v4
+; CHECK-P9-NEXT: xxswapd v5, vs8
+; CHECK-P9-NEXT: vmrglh v5, v5, v0
+; CHECK-P9-NEXT: mfvsrwz r4, f2
+; CHECK-P9-NEXT: mtvsrd f2, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f0
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT: xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: xxmrgld vs2, v3, v2
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: vmrglh v2, v4, v2
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs1
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r5, f9
+; CHECK-P9-NEXT: mtvsrd f9, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f10
+; CHECK-P9-NEXT: mtvsrd f10, r5
+; CHECK-P9-NEXT: xxswapd v0, vs9
+; CHECK-P9-NEXT: xxswapd v1, vs10
+; CHECK-P9-NEXT: vmrglh v0, v1, v0
+; CHECK-P9-NEXT: vmrglw v2, v2, v0
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 0(r4)
-; CHECK-BE-NEXT: lxv vs3, 16(r4)
-; CHECK-BE-NEXT: lxv vs0, 32(r4)
-; CHECK-BE-NEXT: lxv vs1, 48(r4)
-; CHECK-BE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs5, vs3
-; CHECK-BE-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd vs8, vs2
-; CHECK-BE-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs11, vs1
-; CHECK-BE-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd v2, vs0
-; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: lxv vs1, 16(r4)
+; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f9, vs9
-; CHECK-BE-NEXT: xscvspdpn f10, vs10
-; CHECK-BE-NEXT: xscvspdpn f11, vs11
-; CHECK-BE-NEXT: xscvspdpn f12, vs12
-; CHECK-BE-NEXT: xscvspdpn f13, vs13
-; CHECK-BE-NEXT: xscvspdpn v2, v2
-; CHECK-BE-NEXT: xscvspdpn v3, v3
+; CHECK-BE-NEXT: xxswapd vs3, vs1
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mfvsrwz r5, f2
+; CHECK-BE-NEXT: xscvspdpn f4, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v2, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f4
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v3, r5
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v3, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f2
; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v5, r5
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: vmrghh v4, v5, v4
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: lxv vs1, 48(r4)
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v5, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f0
+; CHECK-BE-NEXT: lxv vs0, 32(r4)
+; CHECK-BE-NEXT: xscvspdpn f5, vs1
+; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
-; CHECK-BE-NEXT: mfvsrwz r4, f3
-; CHECK-BE-NEXT: mfvsrwz r5, f2
-; CHECK-BE-NEXT: mfvsrwz r12, f1
-; CHECK-BE-NEXT: mfvsrwz r0, f0
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r9, f7
-; CHECK-BE-NEXT: mfvsrwz r10, f8
-; CHECK-BE-NEXT: mfvsrwz r11, f9
-; CHECK-BE-NEXT: mfvsrwz r30, f10
-; CHECK-BE-NEXT: mfvsrwz r29, f11
-; CHECK-BE-NEXT: mfvsrwz r28, f12
-; CHECK-BE-NEXT: mfvsrwz r27, f13
-; CHECK-BE-NEXT: mfvsrwz r26, v2
-; CHECK-BE-NEXT: mfvsrwz r25, v3
-; CHECK-BE-NEXT: sldi r4, r4, 48
; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r12, r12, 48
-; CHECK-BE-NEXT: sldi r0, r0, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
-; CHECK-BE-NEXT: sldi r11, r11, 48
-; CHECK-BE-NEXT: sldi r30, r30, 48
-; CHECK-BE-NEXT: sldi r29, r29, 48
-; CHECK-BE-NEXT: sldi r28, r28, 48
-; CHECK-BE-NEXT: sldi r27, r27, 48
-; CHECK-BE-NEXT: sldi r26, r26, 48
-; CHECK-BE-NEXT: sldi r25, r25, 48
+; CHECK-BE-NEXT: xxswapd vs3, vs1
+; CHECK-BE-NEXT: mtvsrd v0, r5
+; CHECK-BE-NEXT: vmrghh v5, v5, v0
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mfvsrwz r4, f5
+; CHECK-BE-NEXT: xxmrghd vs4, v3, v2
+; CHECK-BE-NEXT: sldi r4, r4, 48
; CHECK-BE-NEXT: mtvsrd v2, r4
-; CHECK-BE-NEXT: mtvsrd v3, r5
-; CHECK-BE-NEXT: mtvsrd v10, r12
-; CHECK-BE-NEXT: mtvsrd v14, r0
-; CHECK-BE-NEXT: mtvsrd v4, r6
-; CHECK-BE-NEXT: mtvsrd v5, r7
-; CHECK-BE-NEXT: mtvsrd v0, r8
-; CHECK-BE-NEXT: mtvsrd v1, r9
-; CHECK-BE-NEXT: mtvsrd v6, r10
-; CHECK-BE-NEXT: mtvsrd v7, r11
-; CHECK-BE-NEXT: mtvsrd v8, r30
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v9, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r28
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v12, r27
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v13, r26
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v15, r25
-; CHECK-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghh v4, v5, v4
-; CHECK-BE-NEXT: vmrghh v2, v2, v0
-; CHECK-BE-NEXT: vmrghh v5, v6, v1
-; CHECK-BE-NEXT: vmrghh v3, v3, v7
-; CHECK-BE-NEXT: vmrghh v0, v9, v8
-; CHECK-BE-NEXT: vmrghh v1, v10, v11
-; CHECK-BE-NEXT: vmrghh v6, v13, v12
-; CHECK-BE-NEXT: vmrghh v7, v14, v15
-; CHECK-BE-NEXT: vmrghw v2, v2, v4
-; CHECK-BE-NEXT: vmrghw v3, v3, v5
-; CHECK-BE-NEXT: vmrghw v4, v1, v0
-; CHECK-BE-NEXT: vmrghw v5, v7, v6
+; CHECK-BE-NEXT: mfvsrwz r4, f2
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: stxv vs4, 0(r3)
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f3
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghh v2, v2, v4
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f0
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v5, r4
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT: xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xvcvdpuxds v2, vs0
; CHECK-BE-NEXT: blr
entry:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT: xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT: xscvspdpn f1, v2
+; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 1
+; CHECK-BE-NEXT: xscvspdpn f0, v2
; CHECK-BE-NEXT: xxswapd vs2, v2
-; CHECK-BE-NEXT: xscvspdpn f3, v2
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0
-; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <4 x float> %a to <4 x i64>
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs3, vs1
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs0
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xxswapd vs2, vs0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvspdpn f3, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT: lxv vs2, 16(r4)
+; CHECK-P9-NEXT: xxmrghd vs0, vs3, vs0
+; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3
+; CHECK-P9-NEXT: xxswapd vs4, vs2
; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs4
-; CHECK-P9-NEXT: xxmrghd vs3, vs6, vs5
-; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs7
-; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: xxmrghd vs3, vs4, vs3
+; CHECK-P9-NEXT: xscvspdpn f4, vs2
+; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT: stxv vs0, 48(r3)
+; CHECK-P9-NEXT: xxmrghd vs2, vs4, vs2
+; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: stxv vs3, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs2, 0(r3)
+; CHECK-P9-NEXT: stxv vs2, 48(r3)
+; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: lxv vs0, 16(r4)
+; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs4, vs1
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 1
-; CHECK-BE-NEXT: xxsldwi vs6, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs7, vs0
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs3
+; CHECK-BE-NEXT: xscvspdpn f3, vs0
+; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs4
+; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 3
+; CHECK-BE-NEXT: xxswapd vs0, vs0
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
-; CHECK-BE-NEXT: xxmrghd vs2, vs4, vs3
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
-; CHECK-BE-NEXT: xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs0, 32(r3)
-; CHECK-BE-NEXT: stxv vs2, 16(r3)
-; CHECK-BE-NEXT: stxv vs1, 0(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs0, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x float>, <8 x float>* %0, align 32
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: lxv vs3, 32(r4)
-; CHECK-P9-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs5, vs1
-; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs8, vs0
-; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 1
-; CHECK-P9-NEXT: xxsldwi vs10, vs3, vs3, 3
-; CHECK-P9-NEXT: xxswapd vs11, vs3
-; CHECK-P9-NEXT: xxsldwi vs12, vs3, vs3, 1
-; CHECK-P9-NEXT: xxsldwi vs13, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd v2, vs2
-; CHECK-P9-NEXT: xxsldwi v3, vs2, vs2, 1
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
+; CHECK-P9-NEXT: lxv vs4, 16(r4)
+; CHECK-P9-NEXT: xxsldwi vs5, vs4, vs4, 3
+; CHECK-P9-NEXT: xxswapd vs6, vs4
+; CHECK-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xxswapd vs2, vs0
; CHECK-P9-NEXT: xscvspdpn f5, vs5
; CHECK-P9-NEXT: xscvspdpn f6, vs6
+; CHECK-P9-NEXT: xxmrghd vs5, vs6, vs5
+; CHECK-P9-NEXT: xscvspdpn f6, vs4
+; CHECK-P9-NEXT: xxsldwi vs4, vs4, vs4, 1
+; CHECK-P9-NEXT: lxv vs3, 32(r4)
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xxswapd vs7, vs3
; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
-; CHECK-P9-NEXT: xscvspdpn f9, vs9
-; CHECK-P9-NEXT: xscvspdpn f10, vs10
-; CHECK-P9-NEXT: xscvspdpn f11, vs11
-; CHECK-P9-NEXT: xscvspdpn f12, vs12
-; CHECK-P9-NEXT: xscvspdpn f13, vs13
-; CHECK-P9-NEXT: xscvspdpn f31, v2
-; CHECK-P9-NEXT: xscvspdpn f30, v3
-; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs6
-; CHECK-P9-NEXT: xxmrghd vs5, vs8, vs7
-; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs9
-; CHECK-P9-NEXT: xxmrghd vs6, vs11, vs10
-; CHECK-P9-NEXT: xxmrghd vs3, vs3, vs12
-; CHECK-P9-NEXT: xxmrghd vs7, vs31, vs13
-; CHECK-P9-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxmrghd vs2, vs2, vs30
-; CHECK-P9-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT: xscvspdpn f4, vs4
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT: xscvspdpn f2, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xxmrghd vs0, vs2, vs0
+; CHECK-P9-NEXT: xxmrghd vs4, vs6, vs4
+; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
+; CHECK-P9-NEXT: xscvspdpn f6, vs6
+; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT: xscvspdpn f7, vs3
+; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1
+; CHECK-P9-NEXT: lxv vs2, 48(r4)
+; CHECK-P9-NEXT: xxswapd vs8, vs2
+; CHECK-P9-NEXT: xscvspdpn f8, vs8
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: stxv vs5, 32(r3)
; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT: xscvspdpn f3, vs3
+; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3
+; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT: xscvspdpn f7, vs7
+; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7
+; CHECK-P9-NEXT: xscvspdpn f8, vs2
+; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT: stxv vs6, 64(r3)
+; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: stxv vs3, 80(r3)
+; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT: stxv vs0, 48(r3)
-; CHECK-P9-NEXT: stxv vs5, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs4, 0(r3)
; CHECK-P9-NEXT: stxv vs2, 112(r3)
; CHECK-P9-NEXT: stxv vs7, 96(r3)
-; CHECK-P9-NEXT: stxv vs3, 80(r3)
-; CHECK-P9-NEXT: stxv vs6, 64(r3)
+; CHECK-P9-NEXT: stxv vs4, 48(r3)
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
-; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: lxv vs2, 48(r4)
-; CHECK-BE-NEXT: lxv vs3, 32(r4)
-; CHECK-BE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs5, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs6, vs1
-; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT: xxsldwi vs8, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs9, vs0
-; CHECK-BE-NEXT: xxsldwi vs10, vs3, vs3, 1
-; CHECK-BE-NEXT: xxsldwi vs11, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs12, vs3
-; CHECK-BE-NEXT: xxsldwi vs13, vs2, vs2, 1
-; CHECK-BE-NEXT: xxsldwi v2, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd v3, vs2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: lxv vs4, 16(r4)
+; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT: xxswapd vs0, vs0
; CHECK-BE-NEXT: xscvspdpn f5, vs5
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 1
; CHECK-BE-NEXT: xscvspdpn f6, vs6
+; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
+; CHECK-BE-NEXT: xscvspdpn f5, vs4
+; CHECK-BE-NEXT: lxv vs3, 32(r4)
+; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 1
; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f9, vs9
-; CHECK-BE-NEXT: xscvspdpn f10, vs10
-; CHECK-BE-NEXT: xscvspdpn f11, vs11
-; CHECK-BE-NEXT: xscvspdpn f12, vs12
-; CHECK-BE-NEXT: xscvspdpn f13, vs13
-; CHECK-BE-NEXT: xscvspdpn f31, v2
-; CHECK-BE-NEXT: xscvspdpn f30, v3
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs4
-; CHECK-BE-NEXT: xxmrghd vs4, vs6, vs5
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs7
-; CHECK-BE-NEXT: xxmrghd vs5, vs9, vs8
-; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs10
-; CHECK-BE-NEXT: xxmrghd vs6, vs12, vs11
-; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs13
-; CHECK-BE-NEXT: xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: xxmrghd vs5, vs5, vs6
+; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 3
+; CHECK-BE-NEXT: xxswapd vs4, vs4
+; CHECK-BE-NEXT: xscvspdpn f6, vs6
+; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT: lxv vs2, 48(r4)
+; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 1
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT: xscvspdpn f8, vs8
+; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6
+; CHECK-BE-NEXT: xscvspdpn f6, vs3
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: xxmrghd vs6, vs6, vs7
+; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
+; CHECK-BE-NEXT: xscvspdpn f7, vs7
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs7
+; CHECK-BE-NEXT: xscvspdpn f7, vs2
+; CHECK-BE-NEXT: xxmrghd vs7, vs7, vs8
+; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 3
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: xscvspdpn f8, vs8
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs8
+; CHECK-BE-NEXT: stxv vs5, 32(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
-; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT: stxv vs5, 48(r3)
-; CHECK-BE-NEXT: stxv vs0, 32(r3)
-; CHECK-BE-NEXT: stxv vs4, 16(r3)
+; CHECK-BE-NEXT: stxv vs3, 80(r3)
+; CHECK-BE-NEXT: stxv vs7, 96(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT: stxv vs2, 112(r3)
+; CHECK-BE-NEXT: stxv vs6, 64(r3)
+; CHECK-BE-NEXT: stxv vs4, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
-; CHECK-BE-NEXT: stxv vs7, 112(r3)
-; CHECK-BE-NEXT: stxv vs2, 96(r3)
-; CHECK-BE-NEXT: stxv vs6, 80(r3)
-; CHECK-BE-NEXT: stxv vs3, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xvcvdpuxds v2, vs0
; CHECK-BE-NEXT: blr
entry:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT: xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT: xscvspdpn f1, v2
+; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 1
+; CHECK-BE-NEXT: xscvspdpn f0, v2
; CHECK-BE-NEXT: xxswapd vs2, v2
-; CHECK-BE-NEXT: xscvspdpn f3, v2
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0
-; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <4 x float> %a to <4 x i64>
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs3, vs1
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs0
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xxswapd vs2, vs0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvspdpn f3, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT: lxv vs2, 16(r4)
+; CHECK-P9-NEXT: xxmrghd vs0, vs3, vs0
+; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3
+; CHECK-P9-NEXT: xxswapd vs4, vs2
; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs4
-; CHECK-P9-NEXT: xxmrghd vs3, vs6, vs5
-; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs7
-; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: xxmrghd vs3, vs4, vs3
+; CHECK-P9-NEXT: xscvspdpn f4, vs2
+; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT: stxv vs0, 48(r3)
+; CHECK-P9-NEXT: xxmrghd vs2, vs4, vs2
+; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: stxv vs3, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs2, 0(r3)
+; CHECK-P9-NEXT: stxv vs2, 48(r3)
+; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: lxv vs0, 16(r4)
+; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs4, vs1
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 1
-; CHECK-BE-NEXT: xxsldwi vs6, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs7, vs0
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs3
+; CHECK-BE-NEXT: xscvspdpn f3, vs0
+; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs4
+; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 3
+; CHECK-BE-NEXT: xxswapd vs0, vs0
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
-; CHECK-BE-NEXT: xxmrghd vs2, vs4, vs3
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
-; CHECK-BE-NEXT: xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs0, 32(r3)
-; CHECK-BE-NEXT: stxv vs2, 16(r3)
-; CHECK-BE-NEXT: stxv vs1, 0(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs0, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x float>, <8 x float>* %0, align 32
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: lxv vs3, 32(r4)
-; CHECK-P9-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs5, vs1
-; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs8, vs0
-; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 1
-; CHECK-P9-NEXT: xxsldwi vs10, vs3, vs3, 3
-; CHECK-P9-NEXT: xxswapd vs11, vs3
-; CHECK-P9-NEXT: xxsldwi vs12, vs3, vs3, 1
-; CHECK-P9-NEXT: xxsldwi vs13, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd v2, vs2
-; CHECK-P9-NEXT: xxsldwi v3, vs2, vs2, 1
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
+; CHECK-P9-NEXT: lxv vs4, 16(r4)
+; CHECK-P9-NEXT: xxsldwi vs5, vs4, vs4, 3
+; CHECK-P9-NEXT: xxswapd vs6, vs4
+; CHECK-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xxswapd vs2, vs0
; CHECK-P9-NEXT: xscvspdpn f5, vs5
; CHECK-P9-NEXT: xscvspdpn f6, vs6
+; CHECK-P9-NEXT: xxmrghd vs5, vs6, vs5
+; CHECK-P9-NEXT: xscvspdpn f6, vs4
+; CHECK-P9-NEXT: xxsldwi vs4, vs4, vs4, 1
+; CHECK-P9-NEXT: lxv vs3, 32(r4)
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xxswapd vs7, vs3
; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
-; CHECK-P9-NEXT: xscvspdpn f9, vs9
-; CHECK-P9-NEXT: xscvspdpn f10, vs10
-; CHECK-P9-NEXT: xscvspdpn f11, vs11
-; CHECK-P9-NEXT: xscvspdpn f12, vs12
-; CHECK-P9-NEXT: xscvspdpn f13, vs13
-; CHECK-P9-NEXT: xscvspdpn f31, v2
-; CHECK-P9-NEXT: xscvspdpn f30, v3
-; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs6
-; CHECK-P9-NEXT: xxmrghd vs5, vs8, vs7
-; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs9
-; CHECK-P9-NEXT: xxmrghd vs6, vs11, vs10
-; CHECK-P9-NEXT: xxmrghd vs3, vs3, vs12
-; CHECK-P9-NEXT: xxmrghd vs7, vs31, vs13
-; CHECK-P9-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxmrghd vs2, vs2, vs30
-; CHECK-P9-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT: xscvspdpn f4, vs4
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT: xscvspdpn f2, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xxmrghd vs0, vs2, vs0
+; CHECK-P9-NEXT: xxmrghd vs4, vs6, vs4
+; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
+; CHECK-P9-NEXT: xscvspdpn f6, vs6
+; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT: xscvspdpn f7, vs3
+; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1
+; CHECK-P9-NEXT: lxv vs2, 48(r4)
+; CHECK-P9-NEXT: xxswapd vs8, vs2
+; CHECK-P9-NEXT: xscvspdpn f8, vs8
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT: stxv vs5, 32(r3)
; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT: xscvspdpn f3, vs3
+; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3
+; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT: xscvspdpn f7, vs7
+; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7
+; CHECK-P9-NEXT: xscvspdpn f8, vs2
+; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT: stxv vs6, 64(r3)
+; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: stxv vs3, 80(r3)
+; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT: stxv vs0, 48(r3)
-; CHECK-P9-NEXT: stxv vs5, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs4, 0(r3)
; CHECK-P9-NEXT: stxv vs2, 112(r3)
; CHECK-P9-NEXT: stxv vs7, 96(r3)
-; CHECK-P9-NEXT: stxv vs3, 80(r3)
-; CHECK-P9-NEXT: stxv vs6, 64(r3)
+; CHECK-P9-NEXT: stxv vs4, 48(r3)
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
-; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: lxv vs2, 48(r4)
-; CHECK-BE-NEXT: lxv vs3, 32(r4)
-; CHECK-BE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs5, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs6, vs1
-; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT: xxsldwi vs8, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs9, vs0
-; CHECK-BE-NEXT: xxsldwi vs10, vs3, vs3, 1
-; CHECK-BE-NEXT: xxsldwi vs11, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs12, vs3
-; CHECK-BE-NEXT: xxsldwi vs13, vs2, vs2, 1
-; CHECK-BE-NEXT: xxsldwi v2, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd v3, vs2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: lxv vs4, 16(r4)
+; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT: xxswapd vs0, vs0
; CHECK-BE-NEXT: xscvspdpn f5, vs5
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 1
; CHECK-BE-NEXT: xscvspdpn f6, vs6
+; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
+; CHECK-BE-NEXT: xscvspdpn f5, vs4
+; CHECK-BE-NEXT: lxv vs3, 32(r4)
+; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 1
; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f9, vs9
-; CHECK-BE-NEXT: xscvspdpn f10, vs10
-; CHECK-BE-NEXT: xscvspdpn f11, vs11
-; CHECK-BE-NEXT: xscvspdpn f12, vs12
-; CHECK-BE-NEXT: xscvspdpn f13, vs13
-; CHECK-BE-NEXT: xscvspdpn f31, v2
-; CHECK-BE-NEXT: xscvspdpn f30, v3
-; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs4
-; CHECK-BE-NEXT: xxmrghd vs4, vs6, vs5
-; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs7
-; CHECK-BE-NEXT: xxmrghd vs5, vs9, vs8
-; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs10
-; CHECK-BE-NEXT: xxmrghd vs6, vs12, vs11
-; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs13
-; CHECK-BE-NEXT: xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: xxmrghd vs5, vs5, vs6
+; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 3
+; CHECK-BE-NEXT: xxswapd vs4, vs4
+; CHECK-BE-NEXT: xscvspdpn f6, vs6
+; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT: lxv vs2, 48(r4)
+; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 1
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT: xscvspdpn f8, vs8
+; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6
+; CHECK-BE-NEXT: xscvspdpn f6, vs3
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: xxmrghd vs6, vs6, vs7
+; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
+; CHECK-BE-NEXT: xscvspdpn f7, vs7
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs7
+; CHECK-BE-NEXT: xscvspdpn f7, vs2
+; CHECK-BE-NEXT: xxmrghd vs7, vs7, vs8
+; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 3
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: xscvspdpn f8, vs8
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs8
+; CHECK-BE-NEXT: stxv vs5, 32(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
-; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT: stxv vs5, 48(r3)
-; CHECK-BE-NEXT: stxv vs0, 32(r3)
-; CHECK-BE-NEXT: stxv vs4, 16(r3)
+; CHECK-BE-NEXT: stxv vs3, 80(r3)
+; CHECK-BE-NEXT: stxv vs7, 96(r3)
+; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT: stxv vs2, 112(r3)
+; CHECK-BE-NEXT: stxv vs6, 64(r3)
+; CHECK-BE-NEXT: stxv vs4, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
-; CHECK-BE-NEXT: stxv vs7, 112(r3)
-; CHECK-BE-NEXT: stxv vs2, 96(r3)
-; CHECK-BE-NEXT: stxv vs6, 80(r3)
-; CHECK-BE-NEXT: stxv vs3, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: addi r3, r1, -2
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxswapd v2, vs1
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: vmrglb v2, v3, v2
; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT: addi r3, r1, -2
; CHECK-P9-NEXT: stxsihx v2, 0, r3
; CHECK-P9-NEXT: lhz r3, -2(r1)
; CHECK-P9-NEXT: blr
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f1
; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: addi r3, r1, -2
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
; CHECK-BE-NEXT: vmrghb v2, v2, v3
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: stxsihx v2, 0, r3
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: mfvsrwz r5, f3
; CHECK-P9-NEXT: mfvsrwz r3, f0
-; CHECK-P9-NEXT: mfvsrwz r4, f1
-; CHECK-P9-NEXT: mfvsrwz r6, f2
-; CHECK-P9-NEXT: mtvsrd f2, r5
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, v2
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, v2
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: vmrglb v2, v3, v2
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
-; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: vmrglb v2, v4, v2
+; CHECK-P9-NEXT: vmrglh v2, v2, v3
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT: xxswapd vs1, v2
-; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT: xscvspdpn f3, v2
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: mfvsrwz r5, f3
-; CHECK-BE-NEXT: sldi r5, r5, 56
; CHECK-BE-NEXT: mfvsrwz r3, f0
-; CHECK-BE-NEXT: mfvsrwz r4, f1
-; CHECK-BE-NEXT: mfvsrwz r6, f2
-; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: xxswapd vs0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xscvspdpn f0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: vmrghb v2, v3, v2
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: vmrghb v2, v4, v2
+; CHECK-BE-NEXT: vmrghh v2, v2, v3
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs3, vs1
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs0
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: mfvsrwz r5, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: mfvsrwz r3, f2
-; CHECK-P9-NEXT: mfvsrwz r4, f3
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: xxswapd vs2, vs1
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: xscvspdpn f2, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrglb v2, v3, v2
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
-; CHECK-P9-NEXT: vmrglb v4, v1, v0
-; CHECK-P9-NEXT: vmrglb v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v5, v4
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd vs1, vs0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvspdpn f1, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs3, vs1
-; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs6, vs0
-; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r5, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: mfvsrwz r3, f2
-; CHECK-BE-NEXT: mfvsrwz r4, f3
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v6, r9
+; CHECK-BE-NEXT: xxswapd vs2, vs1
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: vmrghb v2, v3, v2
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
-; CHECK-BE-NEXT: vmrghb v4, v1, v0
-; CHECK-BE-NEXT: vmrghb v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 16(r3)
-; CHECK-P9-NEXT: lxv vs3, 0(r3)
-; CHECK-P9-NEXT: lxv vs0, 48(r3)
-; CHECK-P9-NEXT: lxv vs1, 32(r3)
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT: xxswapd vs5, vs3
-; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd vs8, vs2
-; CHECK-P9-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs11, vs1
-; CHECK-P9-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: lxv vs0, 0(r3)
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r4, f1
+; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: xxswapd v2, vs1
+; CHECK-P9-NEXT: xxswapd vs1, vs0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: lxv vs2, 48(r3)
+; CHECK-P9-NEXT: lxv vs3, 32(r3)
+; CHECK-P9-NEXT: lxv vs4, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xscvspdpn f1, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
-; CHECK-P9-NEXT: xscvspdpn f9, vs9
-; CHECK-P9-NEXT: xscvspdpn f10, vs10
-; CHECK-P9-NEXT: xscvspdpn f11, vs11
-; CHECK-P9-NEXT: xscvspdpn f12, vs12
-; CHECK-P9-NEXT: xscvspdpn f13, vs13
-; CHECK-P9-NEXT: xscvspdpn v2, v2
-; CHECK-P9-NEXT: xscvspdpn v3, v3
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
-; CHECK-P9-NEXT: xscvdpsxws f9, f9
-; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r3, f3
-; CHECK-P9-NEXT: mfvsrwz r4, f2
-; CHECK-P9-NEXT: mfvsrwz r11, f1
-; CHECK-P9-NEXT: mfvsrwz r12, f0
-; CHECK-P9-NEXT: mfvsrwz r5, f4
-; CHECK-P9-NEXT: mfvsrwz r6, f5
-; CHECK-P9-NEXT: mfvsrwz r7, f6
-; CHECK-P9-NEXT: mfvsrwz r8, f7
-; CHECK-P9-NEXT: mfvsrwz r9, f8
-; CHECK-P9-NEXT: mfvsrwz r10, f9
-; CHECK-P9-NEXT: mfvsrwz r0, f10
-; CHECK-P9-NEXT: mfvsrwz r30, f11
-; CHECK-P9-NEXT: mfvsrwz r29, f12
-; CHECK-P9-NEXT: mfvsrwz r28, f13
-; CHECK-P9-NEXT: mfvsrwz r27, v2
-; CHECK-P9-NEXT: mfvsrwz r26, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f8, r11
-; CHECK-P9-NEXT: mtvsrd f9, r12
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: mtvsrd f10, r0
-; CHECK-P9-NEXT: mtvsrd f11, r30
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f12, r29
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f13, r28
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v2, r27
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v3, r26
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: xxswapd v5, vs2
-; CHECK-P9-NEXT: xxswapd v0, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs6
-; CHECK-P9-NEXT: xxswapd v8, vs1
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs10
-; CHECK-P9-NEXT: xxswapd v11, vs11
-; CHECK-P9-NEXT: xxswapd v12, vs8
-; CHECK-P9-NEXT: xxswapd v13, vs12
-; CHECK-P9-NEXT: xxswapd v14, vs13
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs9
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglb v5, v0, v5
-; CHECK-P9-NEXT: vmrglb v4, v4, v1
-; CHECK-P9-NEXT: vmrglb v0, v7, v6
-; CHECK-P9-NEXT: vmrglb v1, v8, v9
-; CHECK-P9-NEXT: vmrglb v6, v11, v10
-; CHECK-P9-NEXT: vmrglb v7, v12, v13
-; CHECK-P9-NEXT: vmrglb v2, v2, v14
-; CHECK-P9-NEXT: vmrglb v3, v15, v3
-; CHECK-P9-NEXT: vmrglh v4, v4, v5
-; CHECK-P9-NEXT: vmrglh v5, v1, v0
-; CHECK-P9-NEXT: vmrglh v0, v7, v6
+; CHECK-P9-NEXT: xxsldwi vs0, vs4, vs4, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
-; CHECK-P9-NEXT: vmrglw v2, v2, v0
-; CHECK-P9-NEXT: xxmrgld v2, v2, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs4
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs4
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs4, vs4, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs3, vs3, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs3
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs3, vs3, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs2, vs2, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs2
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs2
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v4, v5, v4
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs2, vs2, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v0, vs0
+; CHECK-P9-NEXT: vmrglb v5, v5, v0
+; CHECK-P9-NEXT: vmrglh v4, v5, v4
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 32(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
+; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: xscvdpsxws f4, f4
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs5, vs3
-; CHECK-BE-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd vs8, vs2
-; CHECK-BE-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs11, vs1
-; CHECK-BE-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd v2, vs0
-; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: xxswapd vs4, vs3
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f9, vs9
-; CHECK-BE-NEXT: xscvspdpn f10, vs10
-; CHECK-BE-NEXT: xscvspdpn f11, vs11
-; CHECK-BE-NEXT: xscvspdpn f12, vs12
-; CHECK-BE-NEXT: xscvspdpn f13, vs13
-; CHECK-BE-NEXT: xscvspdpn v2, v2
-; CHECK-BE-NEXT: xscvspdpn v3, v3
+; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: xscvspdpn f4, vs3
+; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: vmrghb v2, v3, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs2
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvspdpn f3, vs2
+; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: mtvsrd v4, r3
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
; CHECK-BE-NEXT: mfvsrwz r3, f3
-; CHECK-BE-NEXT: mfvsrwz r4, f2
-; CHECK-BE-NEXT: mfvsrwz r11, f1
-; CHECK-BE-NEXT: mfvsrwz r12, f0
-; CHECK-BE-NEXT: mfvsrwz r5, f4
-; CHECK-BE-NEXT: mfvsrwz r6, f5
-; CHECK-BE-NEXT: mfvsrwz r7, f6
-; CHECK-BE-NEXT: mfvsrwz r8, f7
-; CHECK-BE-NEXT: mfvsrwz r9, f8
-; CHECK-BE-NEXT: mfvsrwz r10, f9
-; CHECK-BE-NEXT: mfvsrwz r0, f10
-; CHECK-BE-NEXT: mfvsrwz r30, f11
-; CHECK-BE-NEXT: mfvsrwz r29, f12
-; CHECK-BE-NEXT: mfvsrwz r28, f13
-; CHECK-BE-NEXT: mfvsrwz r27, v2
-; CHECK-BE-NEXT: mfvsrwz r26, v3
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r11, r11, 56
-; CHECK-BE-NEXT: sldi r12, r12, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
-; CHECK-BE-NEXT: sldi r0, r0, 56
-; CHECK-BE-NEXT: sldi r30, r30, 56
-; CHECK-BE-NEXT: sldi r29, r29, 56
-; CHECK-BE-NEXT: sldi r28, r28, 56
-; CHECK-BE-NEXT: sldi r27, r27, 56
-; CHECK-BE-NEXT: sldi r26, r26, 56
-; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v10, r11
-; CHECK-BE-NEXT: mtvsrd v14, r12
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: mtvsrd v7, r10
-; CHECK-BE-NEXT: mtvsrd v8, r0
-; CHECK-BE-NEXT: mtvsrd v9, r30
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v12, r28
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v13, r27
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v15, r26
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghb v4, v5, v4
-; CHECK-BE-NEXT: vmrghb v2, v2, v0
-; CHECK-BE-NEXT: vmrghb v5, v6, v1
-; CHECK-BE-NEXT: vmrghb v3, v3, v7
-; CHECK-BE-NEXT: vmrghb v0, v9, v8
-; CHECK-BE-NEXT: vmrghb v1, v10, v11
-; CHECK-BE-NEXT: vmrghb v6, v13, v12
-; CHECK-BE-NEXT: vmrghb v7, v14, v15
-; CHECK-BE-NEXT: vmrghh v2, v2, v4
-; CHECK-BE-NEXT: vmrghh v3, v3, v5
-; CHECK-BE-NEXT: vmrghh v4, v1, v0
-; CHECK-BE-NEXT: vmrghh v5, v7, v6
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs1
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghb v4, v5, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v0, r3
+; CHECK-BE-NEXT: vmrghb v5, v5, v0
+; CHECK-BE-NEXT: vmrghh v4, v5, v4
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: addi r3, r1, -2
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxswapd v2, vs1
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: vmrglb v2, v3, v2
; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT: addi r3, r1, -2
; CHECK-P9-NEXT: stxsihx v2, 0, r3
; CHECK-P9-NEXT: lhz r3, -2(r1)
; CHECK-P9-NEXT: blr
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
-; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f1
; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: addi r3, r1, -2
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
; CHECK-BE-NEXT: vmrghb v2, v2, v3
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: stxsihx v2, 0, r3
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT: xxswapd vs1, v2
-; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: mfvsrwz r5, f3
; CHECK-P9-NEXT: mfvsrwz r3, f0
-; CHECK-P9-NEXT: mfvsrwz r4, f1
-; CHECK-P9-NEXT: mfvsrwz r6, f2
-; CHECK-P9-NEXT: mtvsrd f2, r5
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, v2
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, v2
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: vmrglb v2, v3, v2
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
-; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: vmrglb v2, v4, v2
+; CHECK-P9-NEXT: vmrglh v2, v2, v3
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT: xxswapd vs1, v2
-; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT: xscvspdpn f3, v2
; CHECK-BE-NEXT: xscvspdpn f0, vs0
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: mfvsrwz r5, f3
-; CHECK-BE-NEXT: sldi r5, r5, 56
; CHECK-BE-NEXT: mfvsrwz r3, f0
-; CHECK-BE-NEXT: mfvsrwz r4, f1
-; CHECK-BE-NEXT: mfvsrwz r6, f2
-; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: xxswapd vs0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xscvspdpn f0, v2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: vmrghb v2, v3, v2
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: vmrghb v2, v4, v2
+; CHECK-BE-NEXT: vmrghh v2, v2, v3
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs3, vs1
-; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd vs6, vs0
-; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f1, vs1
-; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: mfvsrwz r5, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: mfvsrwz r3, f2
-; CHECK-P9-NEXT: mfvsrwz r4, f3
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: xxswapd vs2, vs1
+; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: xscvspdpn f2, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: vmrglb v2, v3, v2
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
-; CHECK-P9-NEXT: vmrglb v4, v1, v0
-; CHECK-P9-NEXT: vmrglb v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v5, v4
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd vs1, vs0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvspdpn f1, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs3, vs1
-; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd vs6, vs0
-; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r5, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: mfvsrwz r3, f2
-; CHECK-BE-NEXT: mfvsrwz r4, f3
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v6, r9
+; CHECK-BE-NEXT: xxswapd vs2, vs1
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
; CHECK-BE-NEXT: vmrghb v2, v3, v2
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
-; CHECK-BE-NEXT: vmrghb v4, v1, v0
-; CHECK-BE-NEXT: vmrghb v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 16(r3)
-; CHECK-P9-NEXT: lxv vs3, 0(r3)
-; CHECK-P9-NEXT: lxv vs0, 48(r3)
-; CHECK-P9-NEXT: lxv vs1, 32(r3)
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT: xxswapd vs5, vs3
-; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT: xxswapd vs8, vs2
-; CHECK-P9-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT: xxswapd vs11, vs1
-; CHECK-P9-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT: xscvspdpn f3, vs3
-; CHECK-P9-NEXT: xscvspdpn f2, vs2
+; CHECK-P9-NEXT: lxv vs0, 0(r3)
+; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3
; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r4, f1
+; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: xxswapd v2, vs1
+; CHECK-P9-NEXT: xxswapd vs1, vs0
+; CHECK-P9-NEXT: xscvspdpn f1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: lxv vs2, 48(r3)
+; CHECK-P9-NEXT: lxv vs3, 32(r3)
+; CHECK-P9-NEXT: lxv vs4, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xscvspdpn f1, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1
; CHECK-P9-NEXT: xscvspdpn f0, vs0
-; CHECK-P9-NEXT: xscvspdpn f4, vs4
-; CHECK-P9-NEXT: xscvspdpn f5, vs5
-; CHECK-P9-NEXT: xscvspdpn f6, vs6
-; CHECK-P9-NEXT: xscvspdpn f7, vs7
-; CHECK-P9-NEXT: xscvspdpn f8, vs8
-; CHECK-P9-NEXT: xscvspdpn f9, vs9
-; CHECK-P9-NEXT: xscvspdpn f10, vs10
-; CHECK-P9-NEXT: xscvspdpn f11, vs11
-; CHECK-P9-NEXT: xscvspdpn f12, vs12
-; CHECK-P9-NEXT: xscvspdpn f13, vs13
-; CHECK-P9-NEXT: xscvspdpn v2, v2
-; CHECK-P9-NEXT: xscvspdpn v3, v3
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
; CHECK-P9-NEXT: xscvdpsxws f1, f1
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
-; CHECK-P9-NEXT: xscvdpsxws f9, f9
-; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r3, f3
-; CHECK-P9-NEXT: mfvsrwz r4, f2
-; CHECK-P9-NEXT: mfvsrwz r11, f1
-; CHECK-P9-NEXT: mfvsrwz r12, f0
-; CHECK-P9-NEXT: mfvsrwz r5, f4
-; CHECK-P9-NEXT: mfvsrwz r6, f5
-; CHECK-P9-NEXT: mfvsrwz r7, f6
-; CHECK-P9-NEXT: mfvsrwz r8, f7
-; CHECK-P9-NEXT: mfvsrwz r9, f8
-; CHECK-P9-NEXT: mfvsrwz r10, f9
-; CHECK-P9-NEXT: mfvsrwz r0, f10
-; CHECK-P9-NEXT: mfvsrwz r30, f11
-; CHECK-P9-NEXT: mfvsrwz r29, f12
-; CHECK-P9-NEXT: mfvsrwz r28, f13
-; CHECK-P9-NEXT: mfvsrwz r27, v2
-; CHECK-P9-NEXT: mfvsrwz r26, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f8, r11
-; CHECK-P9-NEXT: mtvsrd f9, r12
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: mtvsrd f10, r0
-; CHECK-P9-NEXT: mtvsrd f11, r30
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f12, r29
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd f13, r28
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v2, r27
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: mtvsrd v3, r26
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: xxswapd v5, vs2
-; CHECK-P9-NEXT: xxswapd v0, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs6
-; CHECK-P9-NEXT: xxswapd v8, vs1
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs10
-; CHECK-P9-NEXT: xxswapd v11, vs11
-; CHECK-P9-NEXT: xxswapd v12, vs8
-; CHECK-P9-NEXT: xxswapd v13, vs12
-; CHECK-P9-NEXT: xxswapd v14, vs13
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs9
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglb v5, v0, v5
-; CHECK-P9-NEXT: vmrglb v4, v4, v1
-; CHECK-P9-NEXT: vmrglb v0, v7, v6
-; CHECK-P9-NEXT: vmrglb v1, v8, v9
-; CHECK-P9-NEXT: vmrglb v6, v11, v10
-; CHECK-P9-NEXT: vmrglb v7, v12, v13
-; CHECK-P9-NEXT: vmrglb v2, v2, v14
-; CHECK-P9-NEXT: vmrglb v3, v15, v3
-; CHECK-P9-NEXT: vmrglh v4, v4, v5
-; CHECK-P9-NEXT: vmrglh v5, v1, v0
-; CHECK-P9-NEXT: vmrglh v0, v7, v6
+; CHECK-P9-NEXT: xxsldwi vs0, vs4, vs4, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
-; CHECK-P9-NEXT: vmrglw v2, v2, v0
-; CHECK-P9-NEXT: xxmrgld v2, v2, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs4
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs4
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs4, vs4, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs3, vs3, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs3
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v4, v3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs3, vs3, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs2, vs2, 3
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: xxswapd vs0, vs2
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xscvspdpn f0, vs2
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v4, v5, v4
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: xxsldwi vs0, vs2, vs2, 1
+; CHECK-P9-NEXT: xscvspdpn f0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v0, vs0
+; CHECK-P9-NEXT: vmrglb v5, v5, v0
+; CHECK-P9-NEXT: vmrglh v4, v5, v4
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 32(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
+; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT: xscvspdpn f4, vs4
+; CHECK-BE-NEXT: xscvdpsxws f4, f4
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT: xxswapd vs5, vs3
-; CHECK-BE-NEXT: xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT: xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT: xxswapd vs8, vs2
-; CHECK-BE-NEXT: xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT: xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT: xxswapd vs11, vs1
-; CHECK-BE-NEXT: xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT: xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT: xxswapd v2, vs0
-; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT: xscvspdpn f3, vs3
-; CHECK-BE-NEXT: xscvspdpn f2, vs2
-; CHECK-BE-NEXT: xscvspdpn f1, vs1
-; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: xxswapd vs4, vs3
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvspdpn f4, vs4
-; CHECK-BE-NEXT: xscvspdpn f5, vs5
-; CHECK-BE-NEXT: xscvspdpn f6, vs6
-; CHECK-BE-NEXT: xscvspdpn f7, vs7
-; CHECK-BE-NEXT: xscvspdpn f8, vs8
-; CHECK-BE-NEXT: xscvspdpn f9, vs9
-; CHECK-BE-NEXT: xscvspdpn f10, vs10
-; CHECK-BE-NEXT: xscvspdpn f11, vs11
-; CHECK-BE-NEXT: xscvspdpn f12, vs12
-; CHECK-BE-NEXT: xscvspdpn f13, vs13
-; CHECK-BE-NEXT: xscvspdpn v2, v2
-; CHECK-BE-NEXT: xscvspdpn v3, v3
+; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: xscvspdpn f4, vs3
+; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: vmrghb v2, v3, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs2
+; CHECK-BE-NEXT: xscvspdpn f3, vs3
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvspdpn f3, vs2
+; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: mtvsrd v4, r3
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
; CHECK-BE-NEXT: mfvsrwz r3, f3
-; CHECK-BE-NEXT: mfvsrwz r4, f2
-; CHECK-BE-NEXT: mfvsrwz r11, f1
-; CHECK-BE-NEXT: mfvsrwz r12, f0
-; CHECK-BE-NEXT: mfvsrwz r5, f4
-; CHECK-BE-NEXT: mfvsrwz r6, f5
-; CHECK-BE-NEXT: mfvsrwz r7, f6
-; CHECK-BE-NEXT: mfvsrwz r8, f7
-; CHECK-BE-NEXT: mfvsrwz r9, f8
-; CHECK-BE-NEXT: mfvsrwz r10, f9
-; CHECK-BE-NEXT: mfvsrwz r0, f10
-; CHECK-BE-NEXT: mfvsrwz r30, f11
-; CHECK-BE-NEXT: mfvsrwz r29, f12
-; CHECK-BE-NEXT: mfvsrwz r28, f13
-; CHECK-BE-NEXT: mfvsrwz r27, v2
-; CHECK-BE-NEXT: mfvsrwz r26, v3
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r11, r11, 56
-; CHECK-BE-NEXT: sldi r12, r12, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
-; CHECK-BE-NEXT: sldi r0, r0, 56
-; CHECK-BE-NEXT: sldi r30, r30, 56
-; CHECK-BE-NEXT: sldi r29, r29, 56
-; CHECK-BE-NEXT: sldi r28, r28, 56
-; CHECK-BE-NEXT: sldi r27, r27, 56
-; CHECK-BE-NEXT: sldi r26, r26, 56
-; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v10, r11
-; CHECK-BE-NEXT: mtvsrd v14, r12
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: mtvsrd v7, r10
-; CHECK-BE-NEXT: mtvsrd v8, r0
-; CHECK-BE-NEXT: mtvsrd v9, r30
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v12, r28
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v13, r27
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v15, r26
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghb v4, v5, v4
-; CHECK-BE-NEXT: vmrghb v2, v2, v0
-; CHECK-BE-NEXT: vmrghb v5, v6, v1
-; CHECK-BE-NEXT: vmrghb v3, v3, v7
-; CHECK-BE-NEXT: vmrghb v0, v9, v8
-; CHECK-BE-NEXT: vmrghb v1, v10, v11
-; CHECK-BE-NEXT: vmrghb v6, v13, v12
-; CHECK-BE-NEXT: vmrghb v7, v14, v15
-; CHECK-BE-NEXT: vmrghh v2, v2, v4
-; CHECK-BE-NEXT: vmrghh v3, v3, v5
-; CHECK-BE-NEXT: vmrghh v4, v1, v0
-; CHECK-BE-NEXT: vmrghh v5, v7, v6
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs1
+; CHECK-BE-NEXT: xscvspdpn f2, vs2
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvspdpn f2, vs1
+; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghb v3, v4, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs0
+; CHECK-BE-NEXT: xscvspdpn f1, vs1
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvspdpn f1, vs0
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: xscvspdpn f0, vs0
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: vmrghb v4, v5, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v0, r3
+; CHECK-BE-NEXT: vmrghb v5, v5, v0
+; CHECK-BE-NEXT: vmrghh v4, v5, v4
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xscvdpsxws f0, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xscvdpsxws f1, v2
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: mtvsrd f1, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpsxws f0, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: xscvdpsxws f1, v2
+; CHECK-BE-NEXT: sldi r3, r3, 48
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f0
; CHECK-BE-NEXT: mtvsrd v2, r3
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs2, vs1
-; CHECK-P9-NEXT: xxswapd vs3, vs0
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r5, f0
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mfvsrwz r4, f2
-; CHECK-P9-NEXT: mfvsrwz r6, f3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
; CHECK-P9-NEXT: vmrglh v2, v2, v3
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: xxswapd vs2, vs1
-; CHECK-BE-NEXT: xxswapd vs3, vs0
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: mfvsrwz r3, f1
-; CHECK-BE-NEXT: mfvsrwz r5, f0
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f2
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f2
-; CHECK-BE-NEXT: mfvsrwz r6, f3
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: vmrghh v2, v2, v3
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lxv vs3, 0(r3)
+; CHECK-P9-NEXT: xscvdpsxws f4, f3
+; CHECK-P9-NEXT: xxswapd vs3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: lxv vs0, 48(r3)
; CHECK-P9-NEXT: lxv vs1, 32(r3)
; CHECK-P9-NEXT: lxv vs2, 16(r3)
-; CHECK-P9-NEXT: lxv vs3, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs4, vs3
-; CHECK-P9-NEXT: xxswapd vs5, vs2
-; CHECK-P9-NEXT: xxswapd vs6, vs1
-; CHECK-P9-NEXT: xxswapd vs7, vs0
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: xxswapd v2, vs4
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: mfvsrwz r3, f3
-; CHECK-P9-NEXT: mfvsrwz r5, f2
-; CHECK-P9-NEXT: mfvsrwz r7, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mfvsrwz r4, f4
-; CHECK-P9-NEXT: mfvsrwz r6, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
; CHECK-P9-NEXT: vmrglh v2, v2, v3
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
-; CHECK-P9-NEXT: vmrglh v4, v0, v1
-; CHECK-P9-NEXT: vmrglh v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
-; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: lxv vs2, 32(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
-; CHECK-BE-NEXT: xxswapd vs4, vs3
-; CHECK-BE-NEXT: xxswapd vs5, vs2
-; CHECK-BE-NEXT: xxswapd vs6, vs1
-; CHECK-BE-NEXT: xxswapd vs7, vs0
+; CHECK-BE-NEXT: xscvdpsxws f4, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r3, f3
-; CHECK-BE-NEXT: mfvsrwz r5, f2
-; CHECK-BE-NEXT: mfvsrwz r7, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: lxv vs1, 16(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f4
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f4
-; CHECK-BE-NEXT: mfvsrwz r6, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: vmrghh v2, v2, v3
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
-; CHECK-BE-NEXT: vmrghh v4, v0, v1
-; CHECK-BE-NEXT: vmrghh v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 48
; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: lxv vs4, 32(r4)
-; CHECK-P9-NEXT: lxv vs5, 16(r4)
-; CHECK-P9-NEXT: lxv vs6, 0(r4)
-; CHECK-P9-NEXT: lxv vs0, 112(r4)
-; CHECK-P9-NEXT: lxv vs1, 96(r4)
-; CHECK-P9-NEXT: lxv vs3, 80(r4)
-; CHECK-P9-NEXT: lxv vs7, 64(r4)
-; CHECK-P9-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxswapd vs8, vs6
-; CHECK-P9-NEXT: xxswapd vs9, vs5
-; CHECK-P9-NEXT: xxswapd vs10, vs4
-; CHECK-P9-NEXT: xxswapd vs11, vs2
-; CHECK-P9-NEXT: xxswapd vs12, vs7
-; CHECK-P9-NEXT: xxswapd vs13, vs3
-; CHECK-P9-NEXT: xxswapd v2, vs1
-; CHECK-P9-NEXT: xxswapd v3, vs0
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
+; CHECK-P9-NEXT: lxv vs4, 0(r4)
+; CHECK-P9-NEXT: lxv vs3, 16(r4)
+; CHECK-P9-NEXT: lxv vs2, 32(r4)
+; CHECK-P9-NEXT: xscvdpsxws f5, f4
+; CHECK-P9-NEXT: lxv vs1, 48(r4)
+; CHECK-P9-NEXT: xscvdpsxws f6, f3
+; CHECK-P9-NEXT: lxv vs0, 64(r4)
+; CHECK-P9-NEXT: xscvdpsxws f7, f2
+; CHECK-P9-NEXT: xscvdpsxws f8, f1
+; CHECK-P9-NEXT: xxswapd vs4, vs4
; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
+; CHECK-P9-NEXT: mfvsrwz r5, f5
+; CHECK-P9-NEXT: xscvdpsxws f9, f0
+; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mtvsrd f5, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f6
+; CHECK-P9-NEXT: xxswapd vs2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mtvsrd f6, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f7
+; CHECK-P9-NEXT: mtvsrd f7, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f8
+; CHECK-P9-NEXT: mtvsrd f8, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f9
+; CHECK-P9-NEXT: mtvsrd f9, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f4
+; CHECK-P9-NEXT: mtvsrd f4, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f3
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: xxswapd v2, vs5
+; CHECK-P9-NEXT: xxswapd v5, vs8
+; CHECK-P9-NEXT: xxswapd v0, vs9
+; CHECK-P9-NEXT: mtvsrd f3, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f2
+; CHECK-P9-NEXT: mtvsrd f2, r5
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
-; CHECK-P9-NEXT: xscvdpsxws f9, f9
-; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r4, f6
-; CHECK-P9-NEXT: mfvsrwz r5, f5
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f2
-; CHECK-P9-NEXT: mfvsrwz r12, f7
-; CHECK-P9-NEXT: mfvsrwz r0, f3
-; CHECK-P9-NEXT: mfvsrwz r30, f1
-; CHECK-P9-NEXT: mfvsrwz r29, f0
-; CHECK-P9-NEXT: mfvsrwz r8, f8
-; CHECK-P9-NEXT: mfvsrwz r9, f9
-; CHECK-P9-NEXT: mfvsrwz r10, f10
-; CHECK-P9-NEXT: mfvsrwz r11, f11
-; CHECK-P9-NEXT: mfvsrwz r28, f12
-; CHECK-P9-NEXT: mfvsrwz r27, f13
-; CHECK-P9-NEXT: mfvsrwz r26, v2
-; CHECK-P9-NEXT: mfvsrwz r25, v3
-; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v1, vs2
+; CHECK-P9-NEXT: lxv vs2, 80(r4)
+; CHECK-P9-NEXT: xxswapd v3, vs4
+; CHECK-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-P9-NEXT: xxswapd v3, vs6
+; CHECK-P9-NEXT: xxswapd v4, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
+; CHECK-P9-NEXT: mfvsrwz r5, f1
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs7
; CHECK-P9-NEXT: mtvsrd f1, r5
-; CHECK-P9-NEXT: mtvsrd f2, r6
-; CHECK-P9-NEXT: mtvsrd f3, r7
-; CHECK-P9-NEXT: mtvsrd f8, r12
-; CHECK-P9-NEXT: mtvsrd f9, r0
-; CHECK-P9-NEXT: mtvsrd f10, r30
-; CHECK-P9-NEXT: mtvsrd f11, r29
-; CHECK-P9-NEXT: mtvsrd f4, r8
-; CHECK-P9-NEXT: mtvsrd f5, r9
-; CHECK-P9-NEXT: mtvsrd f6, r10
-; CHECK-P9-NEXT: mtvsrd f7, r11
-; CHECK-P9-NEXT: mtvsrd f12, r28
-; CHECK-P9-NEXT: mtvsrd f13, r27
-; CHECK-P9-NEXT: mtvsrd v2, r26
-; CHECK-P9-NEXT: mtvsrd v3, r25
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: xxswapd v5, vs1
-; CHECK-P9-NEXT: xxswapd v0, vs2
-; CHECK-P9-NEXT: xxswapd v1, vs3
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v6, vs4
-; CHECK-P9-NEXT: xxswapd v7, vs5
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v8, vs6
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs8
-; CHECK-P9-NEXT: xxswapd v11, vs12
-; CHECK-P9-NEXT: xxswapd v12, vs9
-; CHECK-P9-NEXT: xxswapd v13, vs13
-; CHECK-P9-NEXT: xxswapd v14, vs10
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs11
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglh v4, v4, v6
-; CHECK-P9-NEXT: vmrglh v5, v5, v7
-; CHECK-P9-NEXT: vmrglh v0, v0, v8
-; CHECK-P9-NEXT: vmrglh v1, v1, v9
-; CHECK-P9-NEXT: vmrglh v6, v10, v11
-; CHECK-P9-NEXT: vmrglh v7, v12, v13
-; CHECK-P9-NEXT: vmrglh v2, v14, v2
-; CHECK-P9-NEXT: vmrglh v3, v15, v3
-; CHECK-P9-NEXT: vmrglw v4, v5, v4
-; CHECK-P9-NEXT: vmrglw v5, v1, v0
-; CHECK-P9-NEXT: vmrglw v0, v7, v6
+; CHECK-P9-NEXT: mfvsrwz r5, f0
+; CHECK-P9-NEXT: vmrglh v4, v4, v1
+; CHECK-P9-NEXT: xxswapd v1, vs1
+; CHECK-P9-NEXT: mtvsrd f0, r5
+; CHECK-P9-NEXT: vmrglh v5, v5, v1
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: xxswapd v1, vs0
+; CHECK-P9-NEXT: lxv vs0, 112(r4)
+; CHECK-P9-NEXT: lxv vs1, 96(r4)
+; CHECK-P9-NEXT: mfvsrwz r4, f3
+; CHECK-P9-NEXT: mtvsrd f3, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f2
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT: xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: xxmrgld vs4, v3, v2
+; CHECK-P9-NEXT: xxswapd v2, vs3
+; CHECK-P9-NEXT: vmrglh v0, v0, v1
+; CHECK-P9-NEXT: mtvsrd f2, r4
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r4, f2
+; CHECK-P9-NEXT: mtvsrd f2, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f1
+; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f1
+; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: vmrglw v2, v2, v0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: stxv vs4, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 16(r4)
-; CHECK-BE-NEXT: lxv vs5, 32(r4)
-; CHECK-BE-NEXT: lxv vs6, 48(r4)
-; CHECK-BE-NEXT: lxv vs0, 64(r4)
-; CHECK-BE-NEXT: lxv vs1, 80(r4)
-; CHECK-BE-NEXT: lxv vs3, 96(r4)
-; CHECK-BE-NEXT: lxv vs7, 112(r4)
-; CHECK-BE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxswapd vs8, vs6
-; CHECK-BE-NEXT: xxswapd vs9, vs5
-; CHECK-BE-NEXT: xxswapd vs10, vs4
-; CHECK-BE-NEXT: xxswapd vs11, vs2
-; CHECK-BE-NEXT: xxswapd vs12, vs7
-; CHECK-BE-NEXT: xxswapd vs13, vs3
-; CHECK-BE-NEXT: xxswapd v2, vs1
-; CHECK-BE-NEXT: xxswapd v3, vs0
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: xscvdpsxws f5, f4
+; CHECK-BE-NEXT: xxswapd vs4, vs4
+; CHECK-BE-NEXT: lxv vs3, 32(r4)
+; CHECK-BE-NEXT: xscvdpsxws f6, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
+; CHECK-BE-NEXT: mfvsrwz r5, f5
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: xscvdpsxws f7, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mtvsrd v2, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f4
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: lxv vs1, 0(r4)
+; CHECK-BE-NEXT: xscvdpsxws f4, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
-; CHECK-BE-NEXT: mfvsrwz r4, f6
-; CHECK-BE-NEXT: mfvsrwz r5, f5
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f2
-; CHECK-BE-NEXT: mfvsrwz r12, f7
-; CHECK-BE-NEXT: mfvsrwz r0, f3
-; CHECK-BE-NEXT: mfvsrwz r30, f1
-; CHECK-BE-NEXT: mfvsrwz r29, f0
-; CHECK-BE-NEXT: mfvsrwz r8, f8
-; CHECK-BE-NEXT: mfvsrwz r9, f9
-; CHECK-BE-NEXT: mfvsrwz r10, f10
-; CHECK-BE-NEXT: mfvsrwz r11, f11
-; CHECK-BE-NEXT: mfvsrwz r28, f12
-; CHECK-BE-NEXT: mfvsrwz r27, f13
-; CHECK-BE-NEXT: mfvsrwz r26, v2
-; CHECK-BE-NEXT: mfvsrwz r25, v3
-; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v3, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f6
; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r12, r12, 48
-; CHECK-BE-NEXT: sldi r0, r0, 48
-; CHECK-BE-NEXT: sldi r30, r30, 48
-; CHECK-BE-NEXT: sldi r29, r29, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
-; CHECK-BE-NEXT: sldi r11, r11, 48
-; CHECK-BE-NEXT: sldi r28, r28, 48
-; CHECK-BE-NEXT: sldi r27, r27, 48
-; CHECK-BE-NEXT: sldi r26, r26, 48
-; CHECK-BE-NEXT: sldi r25, r25, 48
-; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: lxv vs0, 112(r4)
+; CHECK-BE-NEXT: vmrghh v2, v2, v3
; CHECK-BE-NEXT: mtvsrd v3, r5
-; CHECK-BE-NEXT: mtvsrd v4, r6
-; CHECK-BE-NEXT: mtvsrd v5, r7
-; CHECK-BE-NEXT: mtvsrd v8, r12
-; CHECK-BE-NEXT: mtvsrd v10, r0
-; CHECK-BE-NEXT: mtvsrd v12, r30
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v0, r8
-; CHECK-BE-NEXT: mtvsrd v1, r9
-; CHECK-BE-NEXT: mtvsrd v6, r10
-; CHECK-BE-NEXT: mtvsrd v7, r11
-; CHECK-BE-NEXT: mtvsrd v9, r28
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r27
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v13, r26
-; CHECK-BE-NEXT: mtvsrd v14, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v15, r25
-; CHECK-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghh v2, v2, v0
-; CHECK-BE-NEXT: vmrghh v3, v3, v1
-; CHECK-BE-NEXT: vmrghh v4, v4, v6
-; CHECK-BE-NEXT: vmrghh v5, v5, v7
-; CHECK-BE-NEXT: vmrghh v0, v8, v9
-; CHECK-BE-NEXT: vmrghh v1, v10, v11
-; CHECK-BE-NEXT: vmrghh v6, v12, v13
-; CHECK-BE-NEXT: vmrghh v7, v14, v15
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f7
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f4
; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v5, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v0, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f2
+; CHECK-BE-NEXT: lxv vs2, 96(r4)
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v1, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: lxv vs1, 80(r4)
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: vmrghh v4, v4, v1
+; CHECK-BE-NEXT: mtvsrd v1, r5
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: vmrghh v5, v5, v1
+; CHECK-BE-NEXT: mfvsrwz r5, f0
+; CHECK-BE-NEXT: lxv vs0, 64(r4)
+; CHECK-BE-NEXT: mfvsrwz r4, f3
+; CHECK-BE-NEXT: sldi r4, r4, 48
; CHECK-BE-NEXT: vmrghw v3, v5, v4
-; CHECK-BE-NEXT: vmrghw v4, v1, v0
-; CHECK-BE-NEXT: vmrghw v5, v7, v6
+; CHECK-BE-NEXT: xxmrghd vs3, v3, v2
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-NEXT: mfvsrwz r4, f2
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f0
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v1, r5
+; CHECK-BE-NEXT: vmrghh v0, v0, v1
+; CHECK-BE-NEXT: vmrghw v2, v2, v0
+; CHECK-BE-NEXT: stxv vs3, 0(r3)
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v5, r4
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT: xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xscvdpsxws f0, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xscvdpsxws f1, v2
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: mtvsrd f1, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-P9-NEXT: vmrglh v2, v3, v2
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpsxws f0, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: xscvdpsxws f1, v2
+; CHECK-BE-NEXT: sldi r3, r3, 48
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f0
; CHECK-BE-NEXT: mtvsrd v2, r3
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs2, vs1
-; CHECK-P9-NEXT: xxswapd vs3, vs0
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r5, f0
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mfvsrwz r4, f2
-; CHECK-P9-NEXT: mfvsrwz r6, f3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
; CHECK-P9-NEXT: vmrglh v2, v2, v3
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: xxswapd vs2, vs1
-; CHECK-BE-NEXT: xxswapd vs3, vs0
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: mfvsrwz r3, f1
-; CHECK-BE-NEXT: mfvsrwz r5, f0
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f2
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f2
-; CHECK-BE-NEXT: mfvsrwz r6, f3
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: vmrghh v2, v2, v3
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lxv vs3, 0(r3)
+; CHECK-P9-NEXT: xscvdpsxws f4, f3
+; CHECK-P9-NEXT: xxswapd vs3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: lxv vs0, 48(r3)
; CHECK-P9-NEXT: lxv vs1, 32(r3)
; CHECK-P9-NEXT: lxv vs2, 16(r3)
-; CHECK-P9-NEXT: lxv vs3, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs4, vs3
-; CHECK-P9-NEXT: xxswapd vs5, vs2
-; CHECK-P9-NEXT: xxswapd vs6, vs1
-; CHECK-P9-NEXT: xxswapd vs7, vs0
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: xxswapd v2, vs4
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: mfvsrwz r3, f3
-; CHECK-P9-NEXT: mfvsrwz r5, f2
-; CHECK-P9-NEXT: mfvsrwz r7, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mfvsrwz r4, f4
-; CHECK-P9-NEXT: mfvsrwz r6, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
; CHECK-P9-NEXT: vmrglh v2, v2, v3
-; CHECK-P9-NEXT: vmrglh v3, v4, v5
-; CHECK-P9-NEXT: vmrglh v4, v0, v1
-; CHECK-P9-NEXT: vmrglh v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
-; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: lxv vs2, 32(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
-; CHECK-BE-NEXT: xxswapd vs4, vs3
-; CHECK-BE-NEXT: xxswapd vs5, vs2
-; CHECK-BE-NEXT: xxswapd vs6, vs1
-; CHECK-BE-NEXT: xxswapd vs7, vs0
+; CHECK-BE-NEXT: xscvdpsxws f4, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r3, f3
-; CHECK-BE-NEXT: mfvsrwz r5, f2
-; CHECK-BE-NEXT: mfvsrwz r7, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: lxv vs1, 16(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f4
; CHECK-BE-NEXT: sldi r3, r3, 48
-; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
-; CHECK-BE-NEXT: mfvsrwz r4, f4
-; CHECK-BE-NEXT: mfvsrwz r6, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: sldi r4, r4, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: vmrghh v2, v2, v3
-; CHECK-BE-NEXT: vmrghh v3, v4, v5
-; CHECK-BE-NEXT: vmrghh v4, v0, v1
-; CHECK-BE-NEXT: vmrghh v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 48
; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 48
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: lxv vs4, 32(r4)
-; CHECK-P9-NEXT: lxv vs5, 16(r4)
-; CHECK-P9-NEXT: lxv vs6, 0(r4)
-; CHECK-P9-NEXT: lxv vs0, 112(r4)
-; CHECK-P9-NEXT: lxv vs1, 96(r4)
-; CHECK-P9-NEXT: lxv vs3, 80(r4)
-; CHECK-P9-NEXT: lxv vs7, 64(r4)
-; CHECK-P9-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxswapd vs8, vs6
-; CHECK-P9-NEXT: xxswapd vs9, vs5
-; CHECK-P9-NEXT: xxswapd vs10, vs4
-; CHECK-P9-NEXT: xxswapd vs11, vs2
-; CHECK-P9-NEXT: xxswapd vs12, vs7
-; CHECK-P9-NEXT: xxswapd vs13, vs3
-; CHECK-P9-NEXT: xxswapd v2, vs1
-; CHECK-P9-NEXT: xxswapd v3, vs0
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
+; CHECK-P9-NEXT: lxv vs4, 0(r4)
+; CHECK-P9-NEXT: lxv vs3, 16(r4)
+; CHECK-P9-NEXT: lxv vs2, 32(r4)
+; CHECK-P9-NEXT: xscvdpsxws f5, f4
+; CHECK-P9-NEXT: lxv vs1, 48(r4)
+; CHECK-P9-NEXT: xscvdpsxws f6, f3
+; CHECK-P9-NEXT: lxv vs0, 64(r4)
+; CHECK-P9-NEXT: xscvdpsxws f7, f2
+; CHECK-P9-NEXT: xscvdpsxws f8, f1
+; CHECK-P9-NEXT: xxswapd vs4, vs4
; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
+; CHECK-P9-NEXT: mfvsrwz r5, f5
+; CHECK-P9-NEXT: xscvdpsxws f9, f0
+; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mtvsrd f5, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f6
+; CHECK-P9-NEXT: xxswapd vs2, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: mtvsrd f6, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f7
+; CHECK-P9-NEXT: mtvsrd f7, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f8
+; CHECK-P9-NEXT: mtvsrd f8, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f9
+; CHECK-P9-NEXT: mtvsrd f9, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f4
+; CHECK-P9-NEXT: mtvsrd f4, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f3
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: xxswapd v2, vs5
+; CHECK-P9-NEXT: xxswapd v5, vs8
+; CHECK-P9-NEXT: xxswapd v0, vs9
+; CHECK-P9-NEXT: mtvsrd f3, r5
+; CHECK-P9-NEXT: mfvsrwz r5, f2
+; CHECK-P9-NEXT: mtvsrd f2, r5
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
-; CHECK-P9-NEXT: xscvdpsxws f9, f9
-; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r4, f6
-; CHECK-P9-NEXT: mfvsrwz r5, f5
-; CHECK-P9-NEXT: mfvsrwz r6, f4
-; CHECK-P9-NEXT: mfvsrwz r7, f2
-; CHECK-P9-NEXT: mfvsrwz r12, f7
-; CHECK-P9-NEXT: mfvsrwz r0, f3
-; CHECK-P9-NEXT: mfvsrwz r30, f1
-; CHECK-P9-NEXT: mfvsrwz r29, f0
-; CHECK-P9-NEXT: mfvsrwz r8, f8
-; CHECK-P9-NEXT: mfvsrwz r9, f9
-; CHECK-P9-NEXT: mfvsrwz r10, f10
-; CHECK-P9-NEXT: mfvsrwz r11, f11
-; CHECK-P9-NEXT: mfvsrwz r28, f12
-; CHECK-P9-NEXT: mfvsrwz r27, f13
-; CHECK-P9-NEXT: mfvsrwz r26, v2
-; CHECK-P9-NEXT: mfvsrwz r25, v3
-; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v1, vs2
+; CHECK-P9-NEXT: lxv vs2, 80(r4)
+; CHECK-P9-NEXT: xxswapd v3, vs4
+; CHECK-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-P9-NEXT: xxswapd v3, vs6
+; CHECK-P9-NEXT: xxswapd v4, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
+; CHECK-P9-NEXT: mfvsrwz r5, f1
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs7
; CHECK-P9-NEXT: mtvsrd f1, r5
-; CHECK-P9-NEXT: mtvsrd f2, r6
-; CHECK-P9-NEXT: mtvsrd f3, r7
-; CHECK-P9-NEXT: mtvsrd f8, r12
-; CHECK-P9-NEXT: mtvsrd f9, r0
-; CHECK-P9-NEXT: mtvsrd f10, r30
-; CHECK-P9-NEXT: mtvsrd f11, r29
-; CHECK-P9-NEXT: mtvsrd f4, r8
-; CHECK-P9-NEXT: mtvsrd f5, r9
-; CHECK-P9-NEXT: mtvsrd f6, r10
-; CHECK-P9-NEXT: mtvsrd f7, r11
-; CHECK-P9-NEXT: mtvsrd f12, r28
-; CHECK-P9-NEXT: mtvsrd f13, r27
-; CHECK-P9-NEXT: mtvsrd v2, r26
-; CHECK-P9-NEXT: mtvsrd v3, r25
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: xxswapd v5, vs1
-; CHECK-P9-NEXT: xxswapd v0, vs2
-; CHECK-P9-NEXT: xxswapd v1, vs3
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v6, vs4
-; CHECK-P9-NEXT: xxswapd v7, vs5
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v8, vs6
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs8
-; CHECK-P9-NEXT: xxswapd v11, vs12
-; CHECK-P9-NEXT: xxswapd v12, vs9
-; CHECK-P9-NEXT: xxswapd v13, vs13
-; CHECK-P9-NEXT: xxswapd v14, vs10
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs11
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglh v4, v4, v6
-; CHECK-P9-NEXT: vmrglh v5, v5, v7
-; CHECK-P9-NEXT: vmrglh v0, v0, v8
-; CHECK-P9-NEXT: vmrglh v1, v1, v9
-; CHECK-P9-NEXT: vmrglh v6, v10, v11
-; CHECK-P9-NEXT: vmrglh v7, v12, v13
-; CHECK-P9-NEXT: vmrglh v2, v14, v2
-; CHECK-P9-NEXT: vmrglh v3, v15, v3
-; CHECK-P9-NEXT: vmrglw v4, v5, v4
-; CHECK-P9-NEXT: vmrglw v5, v1, v0
-; CHECK-P9-NEXT: vmrglw v0, v7, v6
+; CHECK-P9-NEXT: mfvsrwz r5, f0
+; CHECK-P9-NEXT: vmrglh v4, v4, v1
+; CHECK-P9-NEXT: xxswapd v1, vs1
+; CHECK-P9-NEXT: mtvsrd f0, r5
+; CHECK-P9-NEXT: vmrglh v5, v5, v1
+; CHECK-P9-NEXT: xscvdpsxws f2, f2
+; CHECK-P9-NEXT: xxswapd v1, vs0
+; CHECK-P9-NEXT: lxv vs0, 112(r4)
+; CHECK-P9-NEXT: lxv vs1, 96(r4)
+; CHECK-P9-NEXT: mfvsrwz r4, f3
+; CHECK-P9-NEXT: mtvsrd f3, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f2
; CHECK-P9-NEXT: vmrglw v2, v3, v2
-; CHECK-P9-NEXT: xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT: xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: vmrglw v3, v5, v4
+; CHECK-P9-NEXT: xxmrgld vs4, v3, v2
+; CHECK-P9-NEXT: xxswapd v2, vs3
+; CHECK-P9-NEXT: vmrglh v0, v0, v1
+; CHECK-P9-NEXT: mtvsrd f2, r4
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r4, f2
+; CHECK-P9-NEXT: mtvsrd f2, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f1
+; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r4, f1
+; CHECK-P9-NEXT: mtvsrd f1, r4
+; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: vmrglh v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: vmrglw v2, v2, v0
+; CHECK-P9-NEXT: mtvsrd f0, r4
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglh v4, v4, v5
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: stxv vs4, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 16(r4)
-; CHECK-BE-NEXT: lxv vs5, 32(r4)
-; CHECK-BE-NEXT: lxv vs6, 48(r4)
-; CHECK-BE-NEXT: lxv vs0, 64(r4)
-; CHECK-BE-NEXT: lxv vs1, 80(r4)
-; CHECK-BE-NEXT: lxv vs3, 96(r4)
-; CHECK-BE-NEXT: lxv vs7, 112(r4)
-; CHECK-BE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxswapd vs8, vs6
-; CHECK-BE-NEXT: xxswapd vs9, vs5
-; CHECK-BE-NEXT: xxswapd vs10, vs4
-; CHECK-BE-NEXT: xxswapd vs11, vs2
-; CHECK-BE-NEXT: xxswapd vs12, vs7
-; CHECK-BE-NEXT: xxswapd vs13, vs3
-; CHECK-BE-NEXT: xxswapd v2, vs1
-; CHECK-BE-NEXT: xxswapd v3, vs0
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: xscvdpsxws f5, f4
+; CHECK-BE-NEXT: xxswapd vs4, vs4
+; CHECK-BE-NEXT: lxv vs3, 32(r4)
+; CHECK-BE-NEXT: xscvdpsxws f6, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
+; CHECK-BE-NEXT: mfvsrwz r5, f5
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: xscvdpsxws f7, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mtvsrd v2, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f4
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: lxv vs1, 0(r4)
+; CHECK-BE-NEXT: xscvdpsxws f4, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
-; CHECK-BE-NEXT: mfvsrwz r4, f6
-; CHECK-BE-NEXT: mfvsrwz r5, f5
-; CHECK-BE-NEXT: mfvsrwz r6, f4
-; CHECK-BE-NEXT: mfvsrwz r7, f2
-; CHECK-BE-NEXT: mfvsrwz r12, f7
-; CHECK-BE-NEXT: mfvsrwz r0, f3
-; CHECK-BE-NEXT: mfvsrwz r30, f1
-; CHECK-BE-NEXT: mfvsrwz r29, f0
-; CHECK-BE-NEXT: mfvsrwz r8, f8
-; CHECK-BE-NEXT: mfvsrwz r9, f9
-; CHECK-BE-NEXT: mfvsrwz r10, f10
-; CHECK-BE-NEXT: mfvsrwz r11, f11
-; CHECK-BE-NEXT: mfvsrwz r28, f12
-; CHECK-BE-NEXT: mfvsrwz r27, f13
-; CHECK-BE-NEXT: mfvsrwz r26, v2
-; CHECK-BE-NEXT: mfvsrwz r25, v3
-; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v3, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f6
; CHECK-BE-NEXT: sldi r5, r5, 48
-; CHECK-BE-NEXT: sldi r6, r6, 48
-; CHECK-BE-NEXT: sldi r7, r7, 48
-; CHECK-BE-NEXT: sldi r12, r12, 48
-; CHECK-BE-NEXT: sldi r0, r0, 48
-; CHECK-BE-NEXT: sldi r30, r30, 48
-; CHECK-BE-NEXT: sldi r29, r29, 48
-; CHECK-BE-NEXT: sldi r8, r8, 48
-; CHECK-BE-NEXT: sldi r9, r9, 48
-; CHECK-BE-NEXT: sldi r10, r10, 48
-; CHECK-BE-NEXT: sldi r11, r11, 48
-; CHECK-BE-NEXT: sldi r28, r28, 48
-; CHECK-BE-NEXT: sldi r27, r27, 48
-; CHECK-BE-NEXT: sldi r26, r26, 48
-; CHECK-BE-NEXT: sldi r25, r25, 48
-; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: lxv vs0, 112(r4)
+; CHECK-BE-NEXT: vmrghh v2, v2, v3
; CHECK-BE-NEXT: mtvsrd v3, r5
-; CHECK-BE-NEXT: mtvsrd v4, r6
-; CHECK-BE-NEXT: mtvsrd v5, r7
-; CHECK-BE-NEXT: mtvsrd v8, r12
-; CHECK-BE-NEXT: mtvsrd v10, r0
-; CHECK-BE-NEXT: mtvsrd v12, r30
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v0, r8
-; CHECK-BE-NEXT: mtvsrd v1, r9
-; CHECK-BE-NEXT: mtvsrd v6, r10
-; CHECK-BE-NEXT: mtvsrd v7, r11
-; CHECK-BE-NEXT: mtvsrd v9, r28
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r27
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v13, r26
-; CHECK-BE-NEXT: mtvsrd v14, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v15, r25
-; CHECK-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghh v2, v2, v0
-; CHECK-BE-NEXT: vmrghh v3, v3, v1
-; CHECK-BE-NEXT: vmrghh v4, v4, v6
-; CHECK-BE-NEXT: vmrghh v5, v5, v7
-; CHECK-BE-NEXT: vmrghh v0, v8, v9
-; CHECK-BE-NEXT: vmrghh v1, v10, v11
-; CHECK-BE-NEXT: vmrghh v6, v12, v13
-; CHECK-BE-NEXT: vmrghh v7, v14, v15
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f7
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f4
; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v5, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f3
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v0, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f2
+; CHECK-BE-NEXT: lxv vs2, 96(r4)
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v1, r5
+; CHECK-BE-NEXT: mfvsrwz r5, f1
+; CHECK-BE-NEXT: lxv vs1, 80(r4)
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: vmrghh v4, v4, v1
+; CHECK-BE-NEXT: mtvsrd v1, r5
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: vmrghh v5, v5, v1
+; CHECK-BE-NEXT: mfvsrwz r5, f0
+; CHECK-BE-NEXT: lxv vs0, 64(r4)
+; CHECK-BE-NEXT: mfvsrwz r4, f3
+; CHECK-BE-NEXT: sldi r4, r4, 48
; CHECK-BE-NEXT: vmrghw v3, v5, v4
-; CHECK-BE-NEXT: vmrghw v4, v1, v0
-; CHECK-BE-NEXT: vmrghw v5, v7, v6
+; CHECK-BE-NEXT: xxmrghd vs3, v3, v2
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-NEXT: mfvsrwz r4, f2
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v3, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: vmrghh v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r4, f1
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v4, r4
+; CHECK-BE-NEXT: mfvsrwz r4, f0
+; CHECK-BE-NEXT: sldi r5, r5, 48
+; CHECK-BE-NEXT: mtvsrd v1, r5
+; CHECK-BE-NEXT: vmrghh v0, v0, v1
+; CHECK-BE-NEXT: vmrghw v2, v2, v0
+; CHECK-BE-NEXT: stxv vs3, 0(r3)
+; CHECK-BE-NEXT: sldi r4, r4, 48
+; CHECK-BE-NEXT: mtvsrd v5, r4
+; CHECK-BE-NEXT: vmrghh v4, v4, v5
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT: xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xscvdpuxws f0, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xscvdpuxws f1, v2
+; CHECK-P9-NEXT: mtvsrws v3, r3
; CHECK-P9-NEXT: xscvdpuxws f0, f0
-; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrws v2, r3
-; CHECK-P9-NEXT: mfvsrwz r4, f0
-; CHECK-P9-NEXT: mtvsrws v3, r4
-; CHECK-P9-NEXT: vmrglw v2, v2, v3
+; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpuxws f0, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: xscvdpuxws f1, v2
+; CHECK-BE-NEXT: mtvsrws v3, r3
; CHECK-BE-NEXT: xscvdpuxws f0, f0
-; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: mtvsrws v2, r3
-; CHECK-BE-NEXT: mfvsrwz r4, f0
-; CHECK-BE-NEXT: mtvsrws v3, r4
-; CHECK-BE-NEXT: vmrghw v2, v2, v3
+; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 32(r4)
-; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: lxv vs3, 16(r4)
; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT: lxv vs0, 32(r4)
+; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: xvcvdpuxws v2, vs4
; CHECK-P9-NEXT: xvcvdpuxws v3, vs2
-; CHECK-P9-NEXT: xvcvdpuxws v4, vs3
-; CHECK-P9-NEXT: xvcvdpuxws v5, vs0
+; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT: xvcvdpuxws v4, vs0
; CHECK-P9-NEXT: vmrgew v2, v3, v2
-; CHECK-P9-NEXT: vmrgew v3, v5, v4
-; CHECK-P9-NEXT: stxv v3, 16(r3)
+; CHECK-P9-NEXT: xvcvdpuxws v3, vs2
; CHECK-P9-NEXT: stxv v2, 0(r3)
+; CHECK-P9-NEXT: vmrgew v3, v4, v3
+; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 48(r4)
-; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: lxv vs0, 48(r4)
+; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: xvcvdpuxws v2, vs4
; CHECK-BE-NEXT: xvcvdpuxws v3, vs2
-; CHECK-BE-NEXT: xvcvdpuxws v4, vs3
-; CHECK-BE-NEXT: xvcvdpuxws v5, vs0
+; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: xvcvdpuxws v4, vs0
; CHECK-BE-NEXT: vmrgew v2, v3, v2
-; CHECK-BE-NEXT: vmrgew v3, v5, v4
-; CHECK-BE-NEXT: stxv v3, 16(r3)
+; CHECK-BE-NEXT: xvcvdpuxws v3, vs2
; CHECK-BE-NEXT: stxv v2, 0(r3)
+; CHECK-BE-NEXT: vmrgew v3, v4, v3
+; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x double>, <8 x double>* %0, align 64
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 32(r4)
-; CHECK-P9-NEXT: lxv vs1, 48(r4)
-; CHECK-P9-NEXT: lxv vs2, 0(r4)
-; CHECK-P9-NEXT: lxv vs3, 16(r4)
-; CHECK-P9-NEXT: lxv vs4, 96(r4)
-; CHECK-P9-NEXT: lxv vs5, 112(r4)
-; CHECK-P9-NEXT: lxv vs6, 64(r4)
-; CHECK-P9-NEXT: lxv vs7, 80(r4)
-; CHECK-P9-NEXT: xxmrgld vs8, vs3, vs2
-; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT: xxmrgld vs1, vs7, vs6
+; CHECK-P9-NEXT: lxv vs6, 0(r4)
+; CHECK-P9-NEXT: lxv vs7, 16(r4)
+; CHECK-P9-NEXT: xxmrgld vs8, vs7, vs6
; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT: lxv vs4, 32(r4)
+; CHECK-P9-NEXT: lxv vs5, 48(r4)
; CHECK-P9-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xvcvdpuxws v2, vs8
-; CHECK-P9-NEXT: xvcvdpuxws v3, vs2
-; CHECK-P9-NEXT: xvcvdpuxws v4, vs3
-; CHECK-P9-NEXT: xvcvdpuxws v5, vs0
-; CHECK-P9-NEXT: xvcvdpuxws v0, vs1
-; CHECK-P9-NEXT: xvcvdpuxws v1, vs6
-; CHECK-P9-NEXT: xvcvdpuxws v6, vs7
-; CHECK-P9-NEXT: xvcvdpuxws v7, vs4
+; CHECK-P9-NEXT: xvcvdpuxws v3, vs6
+; CHECK-P9-NEXT: lxv vs2, 64(r4)
+; CHECK-P9-NEXT: lxv vs3, 80(r4)
+; CHECK-P9-NEXT: xvcvdpuxws v4, vs7
; CHECK-P9-NEXT: vmrgew v2, v3, v2
-; CHECK-P9-NEXT: vmrgew v3, v5, v4
-; CHECK-P9-NEXT: vmrgew v4, v1, v0
-; CHECK-P9-NEXT: vmrgew v5, v7, v6
-; CHECK-P9-NEXT: stxv v3, 16(r3)
+; CHECK-P9-NEXT: xvcvdpuxws v3, vs4
+; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT: lxv vs0, 96(r4)
+; CHECK-P9-NEXT: lxv vs1, 112(r4)
; CHECK-P9-NEXT: stxv v2, 0(r3)
-; CHECK-P9-NEXT: stxv v5, 48(r3)
+; CHECK-P9-NEXT: xvcvdpuxws v5, vs2
+; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT: xvcvdpuxws v0, vs0
+; CHECK-P9-NEXT: vmrgew v3, v3, v4
+; CHECK-P9-NEXT: xvcvdpuxws v4, vs4
+; CHECK-P9-NEXT: stxv v3, 16(r3)
+; CHECK-P9-NEXT: vmrgew v4, v5, v4
; CHECK-P9-NEXT: stxv v4, 32(r3)
+; CHECK-P9-NEXT: xvcvdpuxws v5, vs2
+; CHECK-P9-NEXT: vmrgew v5, v0, v5
+; CHECK-P9-NEXT: stxv v5, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 48(r4)
-; CHECK-BE-NEXT: lxv vs1, 32(r4)
-; CHECK-BE-NEXT: lxv vs2, 16(r4)
-; CHECK-BE-NEXT: lxv vs3, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 112(r4)
-; CHECK-BE-NEXT: lxv vs5, 96(r4)
-; CHECK-BE-NEXT: lxv vs6, 80(r4)
-; CHECK-BE-NEXT: lxv vs7, 64(r4)
-; CHECK-BE-NEXT: xxmrgld vs8, vs3, vs2
-; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT: xxmrgld vs1, vs7, vs6
+; CHECK-BE-NEXT: lxv vs6, 16(r4)
+; CHECK-BE-NEXT: lxv vs7, 0(r4)
+; CHECK-BE-NEXT: xxmrgld vs8, vs7, vs6
; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
; CHECK-BE-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-BE-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-BE-NEXT: xvcvdpuxws v2, vs8
-; CHECK-BE-NEXT: xvcvdpuxws v3, vs2
-; CHECK-BE-NEXT: xvcvdpuxws v4, vs3
-; CHECK-BE-NEXT: xvcvdpuxws v5, vs0
-; CHECK-BE-NEXT: xvcvdpuxws v0, vs1
-; CHECK-BE-NEXT: xvcvdpuxws v1, vs6
-; CHECK-BE-NEXT: xvcvdpuxws v6, vs7
-; CHECK-BE-NEXT: xvcvdpuxws v7, vs4
+; CHECK-BE-NEXT: xvcvdpuxws v3, vs6
+; CHECK-BE-NEXT: lxv vs2, 80(r4)
+; CHECK-BE-NEXT: lxv vs3, 64(r4)
+; CHECK-BE-NEXT: xvcvdpuxws v4, vs7
; CHECK-BE-NEXT: vmrgew v2, v3, v2
-; CHECK-BE-NEXT: vmrgew v3, v5, v4
-; CHECK-BE-NEXT: vmrgew v4, v1, v0
-; CHECK-BE-NEXT: vmrgew v5, v7, v6
-; CHECK-BE-NEXT: stxv v3, 16(r3)
+; CHECK-BE-NEXT: xvcvdpuxws v3, vs4
+; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT: lxv vs0, 112(r4)
+; CHECK-BE-NEXT: lxv vs1, 96(r4)
; CHECK-BE-NEXT: stxv v2, 0(r3)
-; CHECK-BE-NEXT: stxv v5, 48(r3)
+; CHECK-BE-NEXT: xvcvdpuxws v5, vs2
+; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: xvcvdpuxws v0, vs0
+; CHECK-BE-NEXT: vmrgew v3, v3, v4
+; CHECK-BE-NEXT: xvcvdpuxws v4, vs4
+; CHECK-BE-NEXT: stxv v3, 16(r3)
+; CHECK-BE-NEXT: vmrgew v4, v5, v4
; CHECK-BE-NEXT: stxv v4, 32(r3)
+; CHECK-BE-NEXT: xvcvdpuxws v5, vs2
+; CHECK-BE-NEXT: vmrgew v5, v0, v5
+; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xscvdpsxws f0, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xscvdpsxws f1, v2
+; CHECK-P9-NEXT: mtvsrws v3, r3
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrws v2, r3
-; CHECK-P9-NEXT: mfvsrwz r4, f0
-; CHECK-P9-NEXT: mtvsrws v3, r4
-; CHECK-P9-NEXT: vmrglw v2, v2, v3
+; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpsxws f0, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: xscvdpsxws f1, v2
+; CHECK-BE-NEXT: mtvsrws v3, r3
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: mtvsrws v2, r3
-; CHECK-BE-NEXT: mfvsrwz r4, f0
-; CHECK-BE-NEXT: mtvsrws v3, r4
-; CHECK-BE-NEXT: vmrghw v2, v2, v3
+; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 32(r4)
-; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: lxv vs3, 16(r4)
; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT: lxv vs0, 32(r4)
+; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: xvcvdpsxws v2, vs4
; CHECK-P9-NEXT: xvcvdpsxws v3, vs2
-; CHECK-P9-NEXT: xvcvdpsxws v4, vs3
-; CHECK-P9-NEXT: xvcvdpsxws v5, vs0
+; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT: xvcvdpsxws v4, vs0
; CHECK-P9-NEXT: vmrgew v2, v3, v2
-; CHECK-P9-NEXT: vmrgew v3, v5, v4
-; CHECK-P9-NEXT: stxv v3, 16(r3)
+; CHECK-P9-NEXT: xvcvdpsxws v3, vs2
; CHECK-P9-NEXT: stxv v2, 0(r3)
+; CHECK-P9-NEXT: vmrgew v3, v4, v3
+; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 48(r4)
-; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: lxv vs0, 48(r4)
+; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: xvcvdpsxws v2, vs4
; CHECK-BE-NEXT: xvcvdpsxws v3, vs2
-; CHECK-BE-NEXT: xvcvdpsxws v4, vs3
-; CHECK-BE-NEXT: xvcvdpsxws v5, vs0
+; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: xvcvdpsxws v4, vs0
; CHECK-BE-NEXT: vmrgew v2, v3, v2
-; CHECK-BE-NEXT: vmrgew v3, v5, v4
-; CHECK-BE-NEXT: stxv v3, 16(r3)
+; CHECK-BE-NEXT: xvcvdpsxws v3, vs2
; CHECK-BE-NEXT: stxv v2, 0(r3)
+; CHECK-BE-NEXT: vmrgew v3, v4, v3
+; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x double>, <8 x double>* %0, align 64
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 32(r4)
-; CHECK-P9-NEXT: lxv vs1, 48(r4)
-; CHECK-P9-NEXT: lxv vs2, 0(r4)
-; CHECK-P9-NEXT: lxv vs3, 16(r4)
-; CHECK-P9-NEXT: lxv vs4, 96(r4)
-; CHECK-P9-NEXT: lxv vs5, 112(r4)
-; CHECK-P9-NEXT: lxv vs6, 64(r4)
-; CHECK-P9-NEXT: lxv vs7, 80(r4)
-; CHECK-P9-NEXT: xxmrgld vs8, vs3, vs2
-; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT: xxmrgld vs1, vs7, vs6
+; CHECK-P9-NEXT: lxv vs6, 0(r4)
+; CHECK-P9-NEXT: lxv vs7, 16(r4)
+; CHECK-P9-NEXT: xxmrgld vs8, vs7, vs6
; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT: lxv vs4, 32(r4)
+; CHECK-P9-NEXT: lxv vs5, 48(r4)
; CHECK-P9-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xvcvdpsxws v2, vs8
-; CHECK-P9-NEXT: xvcvdpsxws v3, vs2
-; CHECK-P9-NEXT: xvcvdpsxws v4, vs3
-; CHECK-P9-NEXT: xvcvdpsxws v5, vs0
-; CHECK-P9-NEXT: xvcvdpsxws v0, vs1
-; CHECK-P9-NEXT: xvcvdpsxws v1, vs6
-; CHECK-P9-NEXT: xvcvdpsxws v6, vs7
-; CHECK-P9-NEXT: xvcvdpsxws v7, vs4
+; CHECK-P9-NEXT: xvcvdpsxws v3, vs6
+; CHECK-P9-NEXT: lxv vs2, 64(r4)
+; CHECK-P9-NEXT: lxv vs3, 80(r4)
+; CHECK-P9-NEXT: xvcvdpsxws v4, vs7
; CHECK-P9-NEXT: vmrgew v2, v3, v2
-; CHECK-P9-NEXT: vmrgew v3, v5, v4
-; CHECK-P9-NEXT: vmrgew v4, v1, v0
-; CHECK-P9-NEXT: vmrgew v5, v7, v6
-; CHECK-P9-NEXT: stxv v3, 16(r3)
+; CHECK-P9-NEXT: xvcvdpsxws v3, vs4
+; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT: lxv vs0, 96(r4)
+; CHECK-P9-NEXT: lxv vs1, 112(r4)
; CHECK-P9-NEXT: stxv v2, 0(r3)
-; CHECK-P9-NEXT: stxv v5, 48(r3)
+; CHECK-P9-NEXT: xvcvdpsxws v5, vs2
+; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT: xvcvdpsxws v0, vs0
+; CHECK-P9-NEXT: vmrgew v3, v3, v4
+; CHECK-P9-NEXT: xvcvdpsxws v4, vs4
+; CHECK-P9-NEXT: stxv v3, 16(r3)
+; CHECK-P9-NEXT: vmrgew v4, v5, v4
; CHECK-P9-NEXT: stxv v4, 32(r3)
+; CHECK-P9-NEXT: xvcvdpsxws v5, vs2
+; CHECK-P9-NEXT: vmrgew v5, v0, v5
+; CHECK-P9-NEXT: stxv v5, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 48(r4)
-; CHECK-BE-NEXT: lxv vs1, 32(r4)
-; CHECK-BE-NEXT: lxv vs2, 16(r4)
-; CHECK-BE-NEXT: lxv vs3, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 112(r4)
-; CHECK-BE-NEXT: lxv vs5, 96(r4)
-; CHECK-BE-NEXT: lxv vs6, 80(r4)
-; CHECK-BE-NEXT: lxv vs7, 64(r4)
-; CHECK-BE-NEXT: xxmrgld vs8, vs3, vs2
-; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT: xxmrgld vs1, vs7, vs6
+; CHECK-BE-NEXT: lxv vs6, 16(r4)
+; CHECK-BE-NEXT: lxv vs7, 0(r4)
+; CHECK-BE-NEXT: xxmrgld vs8, vs7, vs6
; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
; CHECK-BE-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-BE-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-BE-NEXT: xvcvdpsxws v2, vs8
-; CHECK-BE-NEXT: xvcvdpsxws v3, vs2
-; CHECK-BE-NEXT: xvcvdpsxws v4, vs3
-; CHECK-BE-NEXT: xvcvdpsxws v5, vs0
-; CHECK-BE-NEXT: xvcvdpsxws v0, vs1
-; CHECK-BE-NEXT: xvcvdpsxws v1, vs6
-; CHECK-BE-NEXT: xvcvdpsxws v6, vs7
-; CHECK-BE-NEXT: xvcvdpsxws v7, vs4
+; CHECK-BE-NEXT: xvcvdpsxws v3, vs6
+; CHECK-BE-NEXT: lxv vs2, 80(r4)
+; CHECK-BE-NEXT: lxv vs3, 64(r4)
+; CHECK-BE-NEXT: xvcvdpsxws v4, vs7
; CHECK-BE-NEXT: vmrgew v2, v3, v2
-; CHECK-BE-NEXT: vmrgew v3, v5, v4
-; CHECK-BE-NEXT: vmrgew v4, v1, v0
-; CHECK-BE-NEXT: vmrgew v5, v7, v6
-; CHECK-BE-NEXT: stxv v3, 16(r3)
+; CHECK-BE-NEXT: xvcvdpsxws v3, vs4
+; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT: lxv vs0, 112(r4)
+; CHECK-BE-NEXT: lxv vs1, 96(r4)
; CHECK-BE-NEXT: stxv v2, 0(r3)
-; CHECK-BE-NEXT: stxv v5, 48(r3)
+; CHECK-BE-NEXT: xvcvdpsxws v5, vs2
+; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT: xvcvdpsxws v0, vs0
+; CHECK-BE-NEXT: vmrgew v3, v3, v4
+; CHECK-BE-NEXT: xvcvdpsxws v4, vs4
+; CHECK-BE-NEXT: stxv v3, 16(r3)
+; CHECK-BE-NEXT: vmrgew v4, v5, v4
; CHECK-BE-NEXT: stxv v4, 32(r3)
+; CHECK-BE-NEXT: xvcvdpsxws v5, vs2
+; CHECK-BE-NEXT: vmrgew v5, v0, v5
+; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xscvdpsxws f0, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xscvdpsxws f1, v2
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: addi r3, r1, -2
-; CHECK-P9-NEXT: mtvsrd f1, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: vmrglb v2, v2, v3
+; CHECK-P9-NEXT: vmrglb v2, v3, v2
; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8
; CHECK-P9-NEXT: stxsihx v2, 0, r3
; CHECK-P9-NEXT: lhz r3, -2(r1)
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpsxws f0, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: xscvdpsxws f1, v2
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f0
; CHECK-BE-NEXT: mtvsrd v2, r3
; CHECK-BE-NEXT: addi r3, r1, -2
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-NEXT: vmrghb v2, v3, v2
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: stxsihx v2, 0, r3
; CHECK-BE-NEXT: lhz r3, -2(r1)
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs2, vs1
-; CHECK-P9-NEXT: xxswapd vs3, vs0
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r5, f0
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: mfvsrwz r4, f2
-; CHECK-P9-NEXT: mfvsrwz r6, f3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
; CHECK-P9-NEXT: vmrglb v2, v2, v3
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: xxswapd vs2, vs1
-; CHECK-BE-NEXT: xxswapd vs3, vs0
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: vmrghb v2, v2, v3
; CHECK-BE-NEXT: mfvsrwz r3, f1
-; CHECK-BE-NEXT: mfvsrwz r5, f0
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f2
-; CHECK-BE-NEXT: mfvsrwz r6, f3
-; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: vmrghb v2, v2, v3
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lxv vs3, 0(r3)
+; CHECK-P9-NEXT: xscvdpsxws f4, f3
+; CHECK-P9-NEXT: xxswapd vs3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: lxv vs0, 48(r3)
; CHECK-P9-NEXT: lxv vs1, 32(r3)
; CHECK-P9-NEXT: lxv vs2, 16(r3)
-; CHECK-P9-NEXT: lxv vs3, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs4, vs3
-; CHECK-P9-NEXT: xxswapd vs5, vs2
-; CHECK-P9-NEXT: xxswapd vs6, vs1
-; CHECK-P9-NEXT: xxswapd vs7, vs0
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: xxswapd v2, vs4
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: mfvsrwz r3, f3
-; CHECK-P9-NEXT: mfvsrwz r5, f2
-; CHECK-P9-NEXT: mfvsrwz r7, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mfvsrwz r4, f4
-; CHECK-P9-NEXT: mfvsrwz r6, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
; CHECK-P9-NEXT: vmrglb v2, v2, v3
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
-; CHECK-P9-NEXT: vmrglb v4, v0, v1
-; CHECK-P9-NEXT: vmrglb v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v5, v4
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
-; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: lxv vs2, 32(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
-; CHECK-BE-NEXT: xxswapd vs4, vs3
-; CHECK-BE-NEXT: xxswapd vs5, vs2
-; CHECK-BE-NEXT: xxswapd vs6, vs1
-; CHECK-BE-NEXT: xxswapd vs7, vs0
+; CHECK-BE-NEXT: xscvdpsxws f4, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r3, f3
-; CHECK-BE-NEXT: mfvsrwz r5, f2
-; CHECK-BE-NEXT: mfvsrwz r7, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: lxv vs1, 16(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f4
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f4
-; CHECK-BE-NEXT: mfvsrwz r6, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: vmrghb v2, v2, v3
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
-; CHECK-BE-NEXT: vmrghb v4, v0, v1
-; CHECK-BE-NEXT: vmrghb v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 48(r3)
-; CHECK-P9-NEXT: lxv vs3, 32(r3)
-; CHECK-P9-NEXT: lxv vs4, 16(r3)
-; CHECK-P9-NEXT: lxv vs5, 0(r3)
+; CHECK-P9-NEXT: lxv vs7, 0(r3)
+; CHECK-P9-NEXT: xscvdpsxws f8, f7
+; CHECK-P9-NEXT: xxswapd vs7, vs7
+; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: lxv vs0, 112(r3)
; CHECK-P9-NEXT: lxv vs1, 96(r3)
-; CHECK-P9-NEXT: lxv vs6, 80(r3)
-; CHECK-P9-NEXT: lxv vs7, 64(r3)
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxswapd vs8, vs5
-; CHECK-P9-NEXT: xxswapd vs9, vs4
-; CHECK-P9-NEXT: xxswapd vs10, vs3
-; CHECK-P9-NEXT: xxswapd vs11, vs2
-; CHECK-P9-NEXT: xxswapd vs12, vs7
-; CHECK-P9-NEXT: xxswapd vs13, vs6
-; CHECK-P9-NEXT: xxswapd v2, vs1
-; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: lxv vs2, 80(r3)
+; CHECK-P9-NEXT: lxv vs3, 64(r3)
+; CHECK-P9-NEXT: lxv vs4, 48(r3)
+; CHECK-P9-NEXT: lxv vs5, 32(r3)
+; CHECK-P9-NEXT: lxv vs6, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f8
+; CHECK-P9-NEXT: mtvsrd f8, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f7
+; CHECK-P9-NEXT: xxswapd v2, vs8
+; CHECK-P9-NEXT: mtvsrd f7, r3
+; CHECK-P9-NEXT: xxswapd v3, vs7
+; CHECK-P9-NEXT: xscvdpsxws f7, f6
+; CHECK-P9-NEXT: xxswapd vs6, vs6
+; CHECK-P9-NEXT: xscvdpsxws f6, f6
+; CHECK-P9-NEXT: mfvsrwz r3, f7
+; CHECK-P9-NEXT: mtvsrd f7, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f6
+; CHECK-P9-NEXT: mtvsrd f6, r3
+; CHECK-P9-NEXT: xxswapd v4, vs6
+; CHECK-P9-NEXT: xscvdpsxws f6, f5
+; CHECK-P9-NEXT: xxswapd vs5, vs5
; CHECK-P9-NEXT: xscvdpsxws f5, f5
+; CHECK-P9-NEXT: mfvsrwz r3, f6
+; CHECK-P9-NEXT: mtvsrd f6, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f5
+; CHECK-P9-NEXT: vmrglb v2, v2, v3
+; CHECK-P9-NEXT: xxswapd v3, vs7
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs6
+; CHECK-P9-NEXT: mtvsrd f5, r3
+; CHECK-P9-NEXT: xxswapd v4, vs5
+; CHECK-P9-NEXT: xscvdpsxws f5, f4
+; CHECK-P9-NEXT: xxswapd vs4, vs4
; CHECK-P9-NEXT: xscvdpsxws f4, f4
+; CHECK-P9-NEXT: mfvsrwz r3, f5
+; CHECK-P9-NEXT: mtvsrd f5, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: xxswapd v5, vs4
+; CHECK-P9-NEXT: xscvdpsxws f4, f3
+; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs5
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: xxswapd v4, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v5, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs4
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs3
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v5, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
-; CHECK-P9-NEXT: xscvdpsxws f9, f9
-; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r3, f5
-; CHECK-P9-NEXT: mfvsrwz r4, f4
-; CHECK-P9-NEXT: mfvsrwz r5, f3
-; CHECK-P9-NEXT: mfvsrwz r6, f2
-; CHECK-P9-NEXT: mfvsrwz r11, f7
-; CHECK-P9-NEXT: mfvsrwz r12, f6
-; CHECK-P9-NEXT: mfvsrwz r0, f1
-; CHECK-P9-NEXT: mfvsrwz r30, f0
-; CHECK-P9-NEXT: mfvsrwz r7, f8
-; CHECK-P9-NEXT: mfvsrwz r8, f9
-; CHECK-P9-NEXT: mfvsrwz r9, f10
-; CHECK-P9-NEXT: mfvsrwz r10, f11
-; CHECK-P9-NEXT: mfvsrwz r29, f12
-; CHECK-P9-NEXT: mfvsrwz r28, f13
-; CHECK-P9-NEXT: mfvsrwz r27, v2
-; CHECK-P9-NEXT: mfvsrwz r26, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f8, r11
-; CHECK-P9-NEXT: mtvsrd f9, r12
-; CHECK-P9-NEXT: mtvsrd f10, r0
-; CHECK-P9-NEXT: mtvsrd f11, r30
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: mtvsrd f12, r29
-; CHECK-P9-NEXT: mtvsrd f13, r28
-; CHECK-P9-NEXT: mtvsrd v2, r27
-; CHECK-P9-NEXT: mtvsrd v3, r26
-; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: xxswapd v5, vs1
-; CHECK-P9-NEXT: xxswapd v0, vs2
-; CHECK-P9-NEXT: xxswapd v1, vs3
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v6, vs4
-; CHECK-P9-NEXT: xxswapd v7, vs5
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v8, vs6
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs8
-; CHECK-P9-NEXT: xxswapd v11, vs12
-; CHECK-P9-NEXT: xxswapd v12, vs9
-; CHECK-P9-NEXT: xxswapd v13, vs13
-; CHECK-P9-NEXT: xxswapd v14, vs10
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs11
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglb v4, v4, v6
-; CHECK-P9-NEXT: vmrglb v5, v5, v7
-; CHECK-P9-NEXT: vmrglb v0, v0, v8
-; CHECK-P9-NEXT: vmrglb v1, v1, v9
-; CHECK-P9-NEXT: vmrglb v6, v10, v11
-; CHECK-P9-NEXT: vmrglb v7, v12, v13
-; CHECK-P9-NEXT: vmrglb v2, v14, v2
-; CHECK-P9-NEXT: vmrglb v3, v15, v3
+; CHECK-P9-NEXT: xxswapd v0, vs0
+; CHECK-P9-NEXT: vmrglb v5, v5, v0
; CHECK-P9-NEXT: vmrglh v4, v5, v4
-; CHECK-P9-NEXT: vmrglh v5, v1, v0
-; CHECK-P9-NEXT: vmrglh v0, v7, v6
-; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
-; CHECK-P9-NEXT: vmrglw v2, v2, v0
-; CHECK-P9-NEXT: xxmrgld v2, v2, v3
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 64(r3)
-; CHECK-BE-NEXT: lxv vs3, 80(r3)
-; CHECK-BE-NEXT: lxv vs4, 96(r3)
-; CHECK-BE-NEXT: lxv vs5, 112(r3)
+; CHECK-BE-NEXT: lxv vs7, 112(r3)
+; CHECK-BE-NEXT: xscvdpsxws f8, f7
+; CHECK-BE-NEXT: xxswapd vs7, vs7
+; CHECK-BE-NEXT: xscvdpsxws f7, f7
+; CHECK-BE-NEXT: lxv vs6, 96(r3)
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: lxv vs6, 32(r3)
-; CHECK-BE-NEXT: lxv vs7, 48(r3)
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxswapd vs8, vs5
-; CHECK-BE-NEXT: xxswapd vs9, vs4
-; CHECK-BE-NEXT: xxswapd vs10, vs3
-; CHECK-BE-NEXT: xxswapd vs11, vs2
-; CHECK-BE-NEXT: xxswapd vs12, vs7
-; CHECK-BE-NEXT: xxswapd vs13, vs6
-; CHECK-BE-NEXT: xxswapd v2, vs1
-; CHECK-BE-NEXT: xxswapd v3, vs0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: lxv vs3, 48(r3)
+; CHECK-BE-NEXT: lxv vs4, 64(r3)
+; CHECK-BE-NEXT: lxv vs5, 80(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f8
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f7
+; CHECK-BE-NEXT: xscvdpsxws f7, f6
+; CHECK-BE-NEXT: xxswapd vs6, vs6
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f6, f6
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f7
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f6
+; CHECK-BE-NEXT: xscvdpsxws f6, f5
+; CHECK-BE-NEXT: xxswapd vs5, vs5
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f5, f5
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f6
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f5
+; CHECK-BE-NEXT: xscvdpsxws f5, f4
+; CHECK-BE-NEXT: xxswapd vs4, vs4
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f5
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: xscvdpsxws f4, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
-; CHECK-BE-NEXT: mfvsrwz r3, f5
-; CHECK-BE-NEXT: mfvsrwz r4, f4
-; CHECK-BE-NEXT: mfvsrwz r5, f3
-; CHECK-BE-NEXT: mfvsrwz r6, f2
-; CHECK-BE-NEXT: mfvsrwz r11, f7
-; CHECK-BE-NEXT: mfvsrwz r12, f6
-; CHECK-BE-NEXT: mfvsrwz r0, f1
-; CHECK-BE-NEXT: mfvsrwz r30, f0
-; CHECK-BE-NEXT: mfvsrwz r7, f8
-; CHECK-BE-NEXT: mfvsrwz r8, f9
-; CHECK-BE-NEXT: mfvsrwz r9, f10
-; CHECK-BE-NEXT: mfvsrwz r10, f11
-; CHECK-BE-NEXT: mfvsrwz r29, f12
-; CHECK-BE-NEXT: mfvsrwz r28, f13
-; CHECK-BE-NEXT: mfvsrwz r27, v2
-; CHECK-BE-NEXT: mfvsrwz r26, v3
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r11, r11, 56
-; CHECK-BE-NEXT: sldi r12, r12, 56
-; CHECK-BE-NEXT: sldi r0, r0, 56
-; CHECK-BE-NEXT: sldi r30, r30, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
-; CHECK-BE-NEXT: sldi r29, r29, 56
-; CHECK-BE-NEXT: sldi r28, r28, 56
-; CHECK-BE-NEXT: sldi r27, r27, 56
-; CHECK-BE-NEXT: sldi r26, r26, 56
-; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v8, r11
-; CHECK-BE-NEXT: mtvsrd v10, r12
-; CHECK-BE-NEXT: mtvsrd v12, r0
-; CHECK-BE-NEXT: mtvsrd v14, r30
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: mtvsrd v7, r10
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v9, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r28
-; CHECK-BE-NEXT: mtvsrd v13, r27
-; CHECK-BE-NEXT: mtvsrd v15, r26
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghb v2, v2, v0
-; CHECK-BE-NEXT: vmrghb v3, v3, v1
-; CHECK-BE-NEXT: vmrghb v4, v4, v6
-; CHECK-BE-NEXT: vmrghb v5, v5, v7
-; CHECK-BE-NEXT: vmrghb v0, v8, v9
-; CHECK-BE-NEXT: vmrghb v1, v10, v11
-; CHECK-BE-NEXT: vmrghb v6, v12, v13
-; CHECK-BE-NEXT: vmrghb v7, v14, v15
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
-; CHECK-BE-NEXT: vmrghh v4, v1, v0
-; CHECK-BE-NEXT: vmrghh v5, v7, v6
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v0, r3
+; CHECK-BE-NEXT: vmrghb v5, v5, v0
+; CHECK-BE-NEXT: vmrghh v4, v5, v4
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xscvdpsxws f0, v2
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: xxswapd v3, vs0
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xscvdpsxws f1, v2
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r4, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: addi r3, r1, -2
-; CHECK-P9-NEXT: mtvsrd f1, r4
; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: vmrglb v2, v2, v3
+; CHECK-P9-NEXT: vmrglb v2, v3, v2
; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8
; CHECK-P9-NEXT: stxsihx v2, 0, r3
; CHECK-P9-NEXT: lhz r3, -2(r1)
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpsxws f0, v2
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: xxswapd vs0, v2
-; CHECK-BE-NEXT: xscvdpsxws f1, v2
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f0
; CHECK-BE-NEXT: mtvsrd v2, r3
; CHECK-BE-NEXT: addi r3, r1, -2
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-NEXT: vmrghb v2, v3, v2
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: stxsihx v2, 0, r3
; CHECK-BE-NEXT: lhz r3, -2(r1)
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs2, vs1
-; CHECK-P9-NEXT: xxswapd vs3, vs0
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: lxv vs0, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: xxswapd v2, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: mfvsrwz r3, f1
-; CHECK-P9-NEXT: mfvsrwz r5, f0
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: mfvsrwz r4, f2
-; CHECK-P9-NEXT: mfvsrwz r6, f3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: xxswapd v2, vs0
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
; CHECK-P9-NEXT: vmrglb v2, v2, v3
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
+; CHECK-P9-NEXT: xxswapd v3, vs1
+; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: li r3, 0
; CHECK-P9-NEXT: vextuwrx r3, r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: xxswapd vs2, vs1
-; CHECK-BE-NEXT: xxswapd vs3, vs0
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: vmrghb v2, v2, v3
; CHECK-BE-NEXT: mfvsrwz r3, f1
-; CHECK-BE-NEXT: mfvsrwz r5, f0
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f2
-; CHECK-BE-NEXT: mfvsrwz r6, f3
-; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
; CHECK-BE-NEXT: li r3, 0
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: vmrghb v2, v2, v3
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
; CHECK-BE-NEXT: vmrghh v2, v3, v2
; CHECK-BE-NEXT: vextuwlx r3, r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lxv vs3, 0(r3)
+; CHECK-P9-NEXT: xscvdpsxws f4, f3
+; CHECK-P9-NEXT: xxswapd vs3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f3
; CHECK-P9-NEXT: lxv vs0, 48(r3)
; CHECK-P9-NEXT: lxv vs1, 32(r3)
; CHECK-P9-NEXT: lxv vs2, 16(r3)
-; CHECK-P9-NEXT: lxv vs3, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs4, vs3
-; CHECK-P9-NEXT: xxswapd vs5, vs2
-; CHECK-P9-NEXT: xxswapd vs6, vs1
-; CHECK-P9-NEXT: xxswapd vs7, vs0
-; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: xxswapd v2, vs4
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f1, f1
-; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f4, f4
-; CHECK-P9-NEXT: xscvdpsxws f5, f5
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: mfvsrwz r3, f3
-; CHECK-P9-NEXT: mfvsrwz r5, f2
-; CHECK-P9-NEXT: mfvsrwz r7, f1
-; CHECK-P9-NEXT: mfvsrwz r9, f0
-; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mfvsrwz r4, f4
-; CHECK-P9-NEXT: mfvsrwz r6, f5
-; CHECK-P9-NEXT: mfvsrwz r8, f6
-; CHECK-P9-NEXT: mfvsrwz r10, f7
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
; CHECK-P9-NEXT: xxswapd v4, vs2
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: xxswapd v0, vs4
-; CHECK-P9-NEXT: xxswapd v6, vs6
-; CHECK-P9-NEXT: xxswapd v3, vs1
-; CHECK-P9-NEXT: xxswapd v5, vs3
-; CHECK-P9-NEXT: xxswapd v1, vs5
-; CHECK-P9-NEXT: xxswapd v7, vs7
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
; CHECK-P9-NEXT: vmrglb v2, v2, v3
-; CHECK-P9-NEXT: vmrglb v3, v4, v5
-; CHECK-P9-NEXT: vmrglb v4, v0, v1
-; CHECK-P9-NEXT: vmrglb v5, v6, v7
+; CHECK-P9-NEXT: xxswapd v3, vs3
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglh v3, v5, v4
+; CHECK-P9-NEXT: xxswapd v3, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
+; CHECK-P9-NEXT: xscvdpsxws f0, f0
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
+; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs1
+; CHECK-P9-NEXT: xxswapd v5, vs0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 0(r3)
-; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: lxv vs2, 32(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
-; CHECK-BE-NEXT: xxswapd vs4, vs3
-; CHECK-BE-NEXT: xxswapd vs5, vs2
-; CHECK-BE-NEXT: xxswapd vs6, vs1
-; CHECK-BE-NEXT: xxswapd vs7, vs0
+; CHECK-BE-NEXT: xscvdpsxws f4, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
; CHECK-BE-NEXT: xscvdpsxws f3, f3
-; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f1, f1
-; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f4, f4
-; CHECK-BE-NEXT: xscvdpsxws f5, f5
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: mfvsrwz r3, f3
-; CHECK-BE-NEXT: mfvsrwz r5, f2
-; CHECK-BE-NEXT: mfvsrwz r7, f1
-; CHECK-BE-NEXT: mfvsrwz r9, f0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: lxv vs1, 16(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f4
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
-; CHECK-BE-NEXT: mfvsrwz r4, f4
-; CHECK-BE-NEXT: mfvsrwz r6, f5
-; CHECK-BE-NEXT: mfvsrwz r8, f6
-; CHECK-BE-NEXT: mfvsrwz r10, f7
; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v7, r10
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f2, f2
+; CHECK-BE-NEXT: mtvsrd v3, r3
; CHECK-BE-NEXT: vmrghb v2, v2, v3
-; CHECK-BE-NEXT: vmrghb v3, v4, v5
-; CHECK-BE-NEXT: vmrghb v4, v0, v1
-; CHECK-BE-NEXT: vmrghb v5, v6, v7
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f0, f0
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
; CHECK-BE-NEXT: vmrghw v2, v3, v2
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs2, 48(r3)
-; CHECK-P9-NEXT: lxv vs3, 32(r3)
-; CHECK-P9-NEXT: lxv vs4, 16(r3)
-; CHECK-P9-NEXT: lxv vs5, 0(r3)
+; CHECK-P9-NEXT: lxv vs7, 0(r3)
+; CHECK-P9-NEXT: xscvdpsxws f8, f7
+; CHECK-P9-NEXT: xxswapd vs7, vs7
+; CHECK-P9-NEXT: xscvdpsxws f7, f7
; CHECK-P9-NEXT: lxv vs0, 112(r3)
; CHECK-P9-NEXT: lxv vs1, 96(r3)
-; CHECK-P9-NEXT: lxv vs6, 80(r3)
-; CHECK-P9-NEXT: lxv vs7, 64(r3)
-; CHECK-P9-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT: xxswapd vs8, vs5
-; CHECK-P9-NEXT: xxswapd vs9, vs4
-; CHECK-P9-NEXT: xxswapd vs10, vs3
-; CHECK-P9-NEXT: xxswapd vs11, vs2
-; CHECK-P9-NEXT: xxswapd vs12, vs7
-; CHECK-P9-NEXT: xxswapd vs13, vs6
-; CHECK-P9-NEXT: xxswapd v2, vs1
-; CHECK-P9-NEXT: xxswapd v3, vs0
+; CHECK-P9-NEXT: lxv vs2, 80(r3)
+; CHECK-P9-NEXT: lxv vs3, 64(r3)
+; CHECK-P9-NEXT: lxv vs4, 48(r3)
+; CHECK-P9-NEXT: lxv vs5, 32(r3)
+; CHECK-P9-NEXT: lxv vs6, 16(r3)
+; CHECK-P9-NEXT: mfvsrwz r3, f8
+; CHECK-P9-NEXT: mtvsrd f8, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f7
+; CHECK-P9-NEXT: xxswapd v2, vs8
+; CHECK-P9-NEXT: mtvsrd f7, r3
+; CHECK-P9-NEXT: xxswapd v3, vs7
+; CHECK-P9-NEXT: xscvdpsxws f7, f6
+; CHECK-P9-NEXT: xxswapd vs6, vs6
+; CHECK-P9-NEXT: xscvdpsxws f6, f6
+; CHECK-P9-NEXT: mfvsrwz r3, f7
+; CHECK-P9-NEXT: mtvsrd f7, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f6
+; CHECK-P9-NEXT: mtvsrd f6, r3
+; CHECK-P9-NEXT: xxswapd v4, vs6
+; CHECK-P9-NEXT: xscvdpsxws f6, f5
+; CHECK-P9-NEXT: xxswapd vs5, vs5
; CHECK-P9-NEXT: xscvdpsxws f5, f5
+; CHECK-P9-NEXT: mfvsrwz r3, f6
+; CHECK-P9-NEXT: mtvsrd f6, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f5
+; CHECK-P9-NEXT: vmrglb v2, v2, v3
+; CHECK-P9-NEXT: xxswapd v3, vs7
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs6
+; CHECK-P9-NEXT: mtvsrd f5, r3
+; CHECK-P9-NEXT: xxswapd v4, vs5
+; CHECK-P9-NEXT: xscvdpsxws f5, f4
+; CHECK-P9-NEXT: xxswapd vs4, vs4
; CHECK-P9-NEXT: xscvdpsxws f4, f4
+; CHECK-P9-NEXT: mfvsrwz r3, f5
+; CHECK-P9-NEXT: mtvsrd f5, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: xxswapd v5, vs4
+; CHECK-P9-NEXT: xscvdpsxws f4, f3
+; CHECK-P9-NEXT: xxswapd vs3, vs3
; CHECK-P9-NEXT: xscvdpsxws f3, f3
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs5
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f4
+; CHECK-P9-NEXT: mtvsrd f4, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: xxswapd v4, vs3
+; CHECK-P9-NEXT: xscvdpsxws f3, f2
+; CHECK-P9-NEXT: xxswapd vs2, vs2
; CHECK-P9-NEXT: xscvdpsxws f2, f2
-; CHECK-P9-NEXT: xscvdpsxws f7, f7
-; CHECK-P9-NEXT: xscvdpsxws f6, f6
+; CHECK-P9-NEXT: mfvsrwz r3, f3
+; CHECK-P9-NEXT: mtvsrd f3, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: xxswapd v5, vs2
+; CHECK-P9-NEXT: xscvdpsxws f2, f1
+; CHECK-P9-NEXT: xxswapd vs1, vs1
; CHECK-P9-NEXT: xscvdpsxws f1, f1
+; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: xxswapd v3, vs4
+; CHECK-P9-NEXT: vmrglb v3, v3, v4
+; CHECK-P9-NEXT: xxswapd v4, vs3
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
+; CHECK-P9-NEXT: vmrglh v3, v4, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f2
+; CHECK-P9-NEXT: mtvsrd f2, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: xxswapd v4, vs2
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: xxswapd v5, vs1
+; CHECK-P9-NEXT: xscvdpsxws f1, f0
+; CHECK-P9-NEXT: xxswapd vs0, vs0
; CHECK-P9-NEXT: xscvdpsxws f0, f0
-; CHECK-P9-NEXT: xscvdpsxws f8, f8
-; CHECK-P9-NEXT: xscvdpsxws f9, f9
-; CHECK-P9-NEXT: xscvdpsxws f10, f10
-; CHECK-P9-NEXT: xscvdpsxws f11, f11
-; CHECK-P9-NEXT: xscvdpsxws f12, f12
-; CHECK-P9-NEXT: xscvdpsxws f13, f13
-; CHECK-P9-NEXT: xscvdpsxws v2, v2
-; CHECK-P9-NEXT: xscvdpsxws v3, v3
-; CHECK-P9-NEXT: mfvsrwz r3, f5
-; CHECK-P9-NEXT: mfvsrwz r4, f4
-; CHECK-P9-NEXT: mfvsrwz r5, f3
-; CHECK-P9-NEXT: mfvsrwz r6, f2
-; CHECK-P9-NEXT: mfvsrwz r11, f7
-; CHECK-P9-NEXT: mfvsrwz r12, f6
-; CHECK-P9-NEXT: mfvsrwz r0, f1
-; CHECK-P9-NEXT: mfvsrwz r30, f0
-; CHECK-P9-NEXT: mfvsrwz r7, f8
-; CHECK-P9-NEXT: mfvsrwz r8, f9
-; CHECK-P9-NEXT: mfvsrwz r9, f10
-; CHECK-P9-NEXT: mfvsrwz r10, f11
-; CHECK-P9-NEXT: mfvsrwz r29, f12
-; CHECK-P9-NEXT: mfvsrwz r28, f13
-; CHECK-P9-NEXT: mfvsrwz r27, v2
-; CHECK-P9-NEXT: mfvsrwz r26, v3
+; CHECK-P9-NEXT: mfvsrwz r3, f1
+; CHECK-P9-NEXT: mtvsrd f1, r3
+; CHECK-P9-NEXT: mfvsrwz r3, f0
; CHECK-P9-NEXT: mtvsrd f0, r3
-; CHECK-P9-NEXT: mtvsrd f1, r4
-; CHECK-P9-NEXT: mtvsrd f2, r5
-; CHECK-P9-NEXT: mtvsrd f3, r6
-; CHECK-P9-NEXT: mtvsrd f8, r11
-; CHECK-P9-NEXT: mtvsrd f9, r12
-; CHECK-P9-NEXT: mtvsrd f10, r0
-; CHECK-P9-NEXT: mtvsrd f11, r30
-; CHECK-P9-NEXT: mtvsrd f4, r7
-; CHECK-P9-NEXT: mtvsrd f5, r8
-; CHECK-P9-NEXT: mtvsrd f6, r9
-; CHECK-P9-NEXT: mtvsrd f7, r10
-; CHECK-P9-NEXT: mtvsrd f12, r29
-; CHECK-P9-NEXT: mtvsrd f13, r28
-; CHECK-P9-NEXT: mtvsrd v2, r27
-; CHECK-P9-NEXT: mtvsrd v3, r26
-; CHECK-P9-NEXT: xxswapd v4, vs0
+; CHECK-P9-NEXT: vmrglb v4, v4, v5
; CHECK-P9-NEXT: xxswapd v5, vs1
-; CHECK-P9-NEXT: xxswapd v0, vs2
-; CHECK-P9-NEXT: xxswapd v1, vs3
-; CHECK-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v6, vs4
-; CHECK-P9-NEXT: xxswapd v7, vs5
-; CHECK-P9-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT: xxswapd v8, vs6
-; CHECK-P9-NEXT: xxswapd v9, vs7
-; CHECK-P9-NEXT: xxswapd v10, vs8
-; CHECK-P9-NEXT: xxswapd v11, vs12
-; CHECK-P9-NEXT: xxswapd v12, vs9
-; CHECK-P9-NEXT: xxswapd v13, vs13
-; CHECK-P9-NEXT: xxswapd v14, vs10
-; CHECK-P9-NEXT: xxswapd v2, v2
-; CHECK-P9-NEXT: xxswapd v15, vs11
-; CHECK-P9-NEXT: xxswapd v3, v3
-; CHECK-P9-NEXT: vmrglb v4, v4, v6
-; CHECK-P9-NEXT: vmrglb v5, v5, v7
-; CHECK-P9-NEXT: vmrglb v0, v0, v8
-; CHECK-P9-NEXT: vmrglb v1, v1, v9
-; CHECK-P9-NEXT: vmrglb v6, v10, v11
-; CHECK-P9-NEXT: vmrglb v7, v12, v13
-; CHECK-P9-NEXT: vmrglb v2, v14, v2
-; CHECK-P9-NEXT: vmrglb v3, v15, v3
+; CHECK-P9-NEXT: xxswapd v0, vs0
+; CHECK-P9-NEXT: vmrglb v5, v5, v0
; CHECK-P9-NEXT: vmrglh v4, v5, v4
-; CHECK-P9-NEXT: vmrglh v5, v1, v0
-; CHECK-P9-NEXT: vmrglh v0, v7, v6
-; CHECK-P9-NEXT: vmrglh v2, v3, v2
-; CHECK-P9-NEXT: vmrglw v3, v5, v4
-; CHECK-P9-NEXT: vmrglw v2, v2, v0
-; CHECK-P9-NEXT: xxmrgld v2, v2, v3
+; CHECK-P9-NEXT: vmrglw v3, v4, v3
+; CHECK-P9-NEXT: xxmrgld v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs2, 64(r3)
-; CHECK-BE-NEXT: lxv vs3, 80(r3)
-; CHECK-BE-NEXT: lxv vs4, 96(r3)
-; CHECK-BE-NEXT: lxv vs5, 112(r3)
+; CHECK-BE-NEXT: lxv vs7, 112(r3)
+; CHECK-BE-NEXT: xscvdpsxws f8, f7
+; CHECK-BE-NEXT: xxswapd vs7, vs7
+; CHECK-BE-NEXT: xscvdpsxws f7, f7
+; CHECK-BE-NEXT: lxv vs6, 96(r3)
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
-; CHECK-BE-NEXT: lxv vs6, 32(r3)
-; CHECK-BE-NEXT: lxv vs7, 48(r3)
-; CHECK-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: xxswapd vs8, vs5
-; CHECK-BE-NEXT: xxswapd vs9, vs4
-; CHECK-BE-NEXT: xxswapd vs10, vs3
-; CHECK-BE-NEXT: xxswapd vs11, vs2
-; CHECK-BE-NEXT: xxswapd vs12, vs7
-; CHECK-BE-NEXT: xxswapd vs13, vs6
-; CHECK-BE-NEXT: xxswapd v2, vs1
-; CHECK-BE-NEXT: xxswapd v3, vs0
+; CHECK-BE-NEXT: lxv vs2, 32(r3)
+; CHECK-BE-NEXT: lxv vs3, 48(r3)
+; CHECK-BE-NEXT: lxv vs4, 64(r3)
+; CHECK-BE-NEXT: lxv vs5, 80(r3)
+; CHECK-BE-NEXT: mfvsrwz r3, f8
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f7
+; CHECK-BE-NEXT: xscvdpsxws f7, f6
+; CHECK-BE-NEXT: xxswapd vs6, vs6
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: xscvdpsxws f6, f6
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-NEXT: mfvsrwz r3, f7
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f6
+; CHECK-BE-NEXT: xscvdpsxws f6, f5
+; CHECK-BE-NEXT: xxswapd vs5, vs5
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f5, f5
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f6
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f5
+; CHECK-BE-NEXT: xscvdpsxws f5, f4
+; CHECK-BE-NEXT: xxswapd vs4, vs4
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f4, f4
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f5
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: xscvdpsxws f4, f3
+; CHECK-BE-NEXT: xxswapd vs3, vs3
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f3, f3
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f4
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: xscvdpsxws f3, f2
+; CHECK-BE-NEXT: xxswapd vs2, vs2
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f2, f2
-; CHECK-BE-NEXT: xscvdpsxws f7, f7
-; CHECK-BE-NEXT: xscvdpsxws f6, f6
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: vmrghb v3, v3, v4
+; CHECK-BE-NEXT: mfvsrwz r3, f3
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: xscvdpsxws f2, f1
+; CHECK-BE-NEXT: xxswapd vs1, vs1
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f1, f1
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f2
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: vmrghh v3, v4, v3
+; CHECK-BE-NEXT: mtvsrd v4, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f1
+; CHECK-BE-NEXT: xscvdpsxws f1, f0
+; CHECK-BE-NEXT: xxswapd vs0, vs0
+; CHECK-BE-NEXT: sldi r3, r3, 56
; CHECK-BE-NEXT: xscvdpsxws f0, f0
-; CHECK-BE-NEXT: xscvdpsxws f8, f8
-; CHECK-BE-NEXT: xscvdpsxws f9, f9
-; CHECK-BE-NEXT: xscvdpsxws f10, f10
-; CHECK-BE-NEXT: xscvdpsxws f11, f11
-; CHECK-BE-NEXT: xscvdpsxws f12, f12
-; CHECK-BE-NEXT: xscvdpsxws f13, f13
-; CHECK-BE-NEXT: xscvdpsxws v2, v2
-; CHECK-BE-NEXT: xscvdpsxws v3, v3
-; CHECK-BE-NEXT: mfvsrwz r3, f5
-; CHECK-BE-NEXT: mfvsrwz r4, f4
-; CHECK-BE-NEXT: mfvsrwz r5, f3
-; CHECK-BE-NEXT: mfvsrwz r6, f2
-; CHECK-BE-NEXT: mfvsrwz r11, f7
-; CHECK-BE-NEXT: mfvsrwz r12, f6
-; CHECK-BE-NEXT: mfvsrwz r0, f1
-; CHECK-BE-NEXT: mfvsrwz r30, f0
-; CHECK-BE-NEXT: mfvsrwz r7, f8
-; CHECK-BE-NEXT: mfvsrwz r8, f9
-; CHECK-BE-NEXT: mfvsrwz r9, f10
-; CHECK-BE-NEXT: mfvsrwz r10, f11
-; CHECK-BE-NEXT: mfvsrwz r29, f12
-; CHECK-BE-NEXT: mfvsrwz r28, f13
-; CHECK-BE-NEXT: mfvsrwz r27, v2
-; CHECK-BE-NEXT: mfvsrwz r26, v3
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: vmrghb v4, v4, v5
+; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: sldi r3, r3, 56
-; CHECK-BE-NEXT: sldi r4, r4, 56
-; CHECK-BE-NEXT: sldi r5, r5, 56
-; CHECK-BE-NEXT: sldi r6, r6, 56
-; CHECK-BE-NEXT: sldi r11, r11, 56
-; CHECK-BE-NEXT: sldi r12, r12, 56
-; CHECK-BE-NEXT: sldi r0, r0, 56
-; CHECK-BE-NEXT: sldi r30, r30, 56
-; CHECK-BE-NEXT: sldi r7, r7, 56
-; CHECK-BE-NEXT: sldi r8, r8, 56
-; CHECK-BE-NEXT: sldi r9, r9, 56
-; CHECK-BE-NEXT: sldi r10, r10, 56
-; CHECK-BE-NEXT: sldi r29, r29, 56
-; CHECK-BE-NEXT: sldi r28, r28, 56
-; CHECK-BE-NEXT: sldi r27, r27, 56
-; CHECK-BE-NEXT: sldi r26, r26, 56
-; CHECK-BE-NEXT: mtvsrd v2, r3
-; CHECK-BE-NEXT: mtvsrd v3, r4
-; CHECK-BE-NEXT: mtvsrd v4, r5
-; CHECK-BE-NEXT: mtvsrd v5, r6
-; CHECK-BE-NEXT: mtvsrd v8, r11
-; CHECK-BE-NEXT: mtvsrd v10, r12
-; CHECK-BE-NEXT: mtvsrd v12, r0
-; CHECK-BE-NEXT: mtvsrd v14, r30
-; CHECK-BE-NEXT: mtvsrd v0, r7
-; CHECK-BE-NEXT: mtvsrd v1, r8
-; CHECK-BE-NEXT: mtvsrd v6, r9
-; CHECK-BE-NEXT: mtvsrd v7, r10
-; CHECK-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v9, r29
-; CHECK-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: mtvsrd v11, r28
-; CHECK-BE-NEXT: mtvsrd v13, r27
-; CHECK-BE-NEXT: mtvsrd v15, r26
-; CHECK-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: vmrghb v2, v2, v0
-; CHECK-BE-NEXT: vmrghb v3, v3, v1
-; CHECK-BE-NEXT: vmrghb v4, v4, v6
-; CHECK-BE-NEXT: vmrghb v5, v5, v7
-; CHECK-BE-NEXT: vmrghb v0, v8, v9
-; CHECK-BE-NEXT: vmrghb v1, v10, v11
-; CHECK-BE-NEXT: vmrghb v6, v12, v13
-; CHECK-BE-NEXT: vmrghb v7, v14, v15
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
-; CHECK-BE-NEXT: vmrghh v4, v1, v0
-; CHECK-BE-NEXT: vmrghh v5, v7, v6
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: vmrghw v3, v5, v4
+; CHECK-BE-NEXT: mtvsrd v5, r3
+; CHECK-BE-NEXT: mfvsrwz r3, f0
+; CHECK-BE-NEXT: sldi r3, r3, 56
+; CHECK-BE-NEXT: mtvsrd v0, r3
+; CHECK-BE-NEXT: vmrghb v5, v5, v0
+; CHECK-BE-NEXT: vmrghh v4, v5, v4
+; CHECK-BE-NEXT: vmrghw v3, v4, v3
; CHECK-BE-NEXT: xxmrghd v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 48(r4)
-; CHECK-P9-NEXT: lxv vs1, 32(r4)
-; CHECK-P9-NEXT: lxv vs2, 16(r4)
-; CHECK-P9-NEXT: lxv vs3, 0(r4)
-; CHECK-P9-NEXT: lxv vs4, 112(r4)
-; CHECK-P9-NEXT: lxv vs5, 96(r4)
-; CHECK-P9-NEXT: lxv vs6, 80(r4)
-; CHECK-P9-NEXT: lxv vs7, 64(r4)
+; CHECK-P9-NEXT: lxv vs0, 112(r4)
+; CHECK-P9-NEXT: lxv vs1, 96(r4)
+; CHECK-P9-NEXT: lxv vs2, 80(r4)
+; CHECK-P9-NEXT: lxv vs3, 64(r4)
+; CHECK-P9-NEXT: lxv vs4, 48(r4)
+; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT: lxv vs5, 32(r4)
+; CHECK-P9-NEXT: lxv vs6, 16(r4)
+; CHECK-P9-NEXT: lxv vs7, 0(r4)
+; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
-; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
-; CHECK-P9-NEXT: stxv vs0, 48(r3)
-; CHECK-P9-NEXT: stxv vs1, 32(r3)
-; CHECK-P9-NEXT: stxv vs2, 16(r3)
-; CHECK-P9-NEXT: stxv vs3, 0(r3)
-; CHECK-P9-NEXT: stxv vs4, 112(r3)
-; CHECK-P9-NEXT: stxv vs5, 96(r3)
-; CHECK-P9-NEXT: stxv vs6, 80(r3)
-; CHECK-P9-NEXT: stxv vs7, 64(r3)
+; CHECK-P9-NEXT: stxv vs0, 112(r3)
+; CHECK-P9-NEXT: stxv vs1, 96(r3)
+; CHECK-P9-NEXT: stxv vs2, 80(r3)
+; CHECK-P9-NEXT: stxv vs3, 64(r3)
+; CHECK-P9-NEXT: stxv vs4, 48(r3)
+; CHECK-P9-NEXT: stxv vs5, 32(r3)
+; CHECK-P9-NEXT: stxv vs6, 16(r3)
+; CHECK-P9-NEXT: stxv vs7, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 48(r4)
-; CHECK-BE-NEXT: lxv vs1, 32(r4)
-; CHECK-BE-NEXT: lxv vs2, 16(r4)
-; CHECK-BE-NEXT: lxv vs3, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 112(r4)
-; CHECK-BE-NEXT: lxv vs5, 96(r4)
-; CHECK-BE-NEXT: lxv vs6, 80(r4)
-; CHECK-BE-NEXT: lxv vs7, 64(r4)
+; CHECK-BE-NEXT: lxv vs0, 112(r4)
+; CHECK-BE-NEXT: lxv vs1, 96(r4)
+; CHECK-BE-NEXT: lxv vs2, 80(r4)
+; CHECK-BE-NEXT: lxv vs3, 64(r4)
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
+; CHECK-BE-NEXT: lxv vs6, 16(r4)
+; CHECK-BE-NEXT: lxv vs7, 0(r4)
+; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
+; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
+; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
-; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
-; CHECK-BE-NEXT: stxv vs0, 48(r3)
-; CHECK-BE-NEXT: stxv vs1, 32(r3)
-; CHECK-BE-NEXT: stxv vs2, 16(r3)
-; CHECK-BE-NEXT: stxv vs3, 0(r3)
-; CHECK-BE-NEXT: stxv vs4, 112(r3)
-; CHECK-BE-NEXT: stxv vs5, 96(r3)
-; CHECK-BE-NEXT: stxv vs6, 80(r3)
-; CHECK-BE-NEXT: stxv vs7, 64(r3)
+; CHECK-BE-NEXT: stxv vs0, 112(r3)
+; CHECK-BE-NEXT: stxv vs1, 96(r3)
+; CHECK-BE-NEXT: stxv vs2, 80(r3)
+; CHECK-BE-NEXT: stxv vs3, 64(r3)
+; CHECK-BE-NEXT: stxv vs4, 48(r3)
+; CHECK-BE-NEXT: stxv vs5, 32(r3)
+; CHECK-BE-NEXT: stxv vs6, 16(r3)
+; CHECK-BE-NEXT: stxv vs7, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 48(r4)
-; CHECK-P9-NEXT: lxv vs1, 32(r4)
-; CHECK-P9-NEXT: lxv vs2, 16(r4)
-; CHECK-P9-NEXT: lxv vs3, 0(r4)
-; CHECK-P9-NEXT: lxv vs4, 112(r4)
-; CHECK-P9-NEXT: lxv vs5, 96(r4)
-; CHECK-P9-NEXT: lxv vs6, 80(r4)
-; CHECK-P9-NEXT: lxv vs7, 64(r4)
+; CHECK-P9-NEXT: lxv vs0, 112(r4)
+; CHECK-P9-NEXT: lxv vs1, 96(r4)
+; CHECK-P9-NEXT: lxv vs2, 80(r4)
+; CHECK-P9-NEXT: lxv vs3, 64(r4)
+; CHECK-P9-NEXT: lxv vs4, 48(r4)
+; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4
+; CHECK-P9-NEXT: lxv vs5, 32(r4)
+; CHECK-P9-NEXT: lxv vs6, 16(r4)
+; CHECK-P9-NEXT: lxv vs7, 0(r4)
+; CHECK-P9-NEXT: xvcvdpsxds vs7, vs7
+; CHECK-P9-NEXT: xvcvdpsxds vs6, vs6
+; CHECK-P9-NEXT: xvcvdpsxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpsxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpsxds vs0, vs0
-; CHECK-P9-NEXT: xvcvdpsxds vs7, vs7
-; CHECK-P9-NEXT: xvcvdpsxds vs6, vs6
-; CHECK-P9-NEXT: xvcvdpsxds vs5, vs5
-; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4
-; CHECK-P9-NEXT: stxv vs0, 48(r3)
-; CHECK-P9-NEXT: stxv vs1, 32(r3)
-; CHECK-P9-NEXT: stxv vs2, 16(r3)
-; CHECK-P9-NEXT: stxv vs3, 0(r3)
-; CHECK-P9-NEXT: stxv vs4, 112(r3)
-; CHECK-P9-NEXT: stxv vs5, 96(r3)
-; CHECK-P9-NEXT: stxv vs6, 80(r3)
-; CHECK-P9-NEXT: stxv vs7, 64(r3)
+; CHECK-P9-NEXT: stxv vs0, 112(r3)
+; CHECK-P9-NEXT: stxv vs1, 96(r3)
+; CHECK-P9-NEXT: stxv vs2, 80(r3)
+; CHECK-P9-NEXT: stxv vs3, 64(r3)
+; CHECK-P9-NEXT: stxv vs4, 48(r3)
+; CHECK-P9-NEXT: stxv vs5, 32(r3)
+; CHECK-P9-NEXT: stxv vs6, 16(r3)
+; CHECK-P9-NEXT: stxv vs7, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 48(r4)
-; CHECK-BE-NEXT: lxv vs1, 32(r4)
-; CHECK-BE-NEXT: lxv vs2, 16(r4)
-; CHECK-BE-NEXT: lxv vs3, 0(r4)
-; CHECK-BE-NEXT: lxv vs4, 112(r4)
-; CHECK-BE-NEXT: lxv vs5, 96(r4)
-; CHECK-BE-NEXT: lxv vs6, 80(r4)
-; CHECK-BE-NEXT: lxv vs7, 64(r4)
+; CHECK-BE-NEXT: lxv vs0, 112(r4)
+; CHECK-BE-NEXT: lxv vs1, 96(r4)
+; CHECK-BE-NEXT: lxv vs2, 80(r4)
+; CHECK-BE-NEXT: lxv vs3, 64(r4)
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
+; CHECK-BE-NEXT: lxv vs6, 16(r4)
+; CHECK-BE-NEXT: lxv vs7, 0(r4)
+; CHECK-BE-NEXT: xvcvdpsxds vs7, vs7
+; CHECK-BE-NEXT: xvcvdpsxds vs6, vs6
+; CHECK-BE-NEXT: xvcvdpsxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpsxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpsxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpsxds vs0, vs0
-; CHECK-BE-NEXT: xvcvdpsxds vs7, vs7
-; CHECK-BE-NEXT: xvcvdpsxds vs6, vs6
-; CHECK-BE-NEXT: xvcvdpsxds vs5, vs5
-; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4
-; CHECK-BE-NEXT: stxv vs0, 48(r3)
-; CHECK-BE-NEXT: stxv vs1, 32(r3)
-; CHECK-BE-NEXT: stxv vs2, 16(r3)
-; CHECK-BE-NEXT: stxv vs3, 0(r3)
-; CHECK-BE-NEXT: stxv vs4, 112(r3)
-; CHECK-BE-NEXT: stxv vs5, 96(r3)
-; CHECK-BE-NEXT: stxv vs6, 80(r3)
-; CHECK-BE-NEXT: stxv vs7, 64(r3)
+; CHECK-BE-NEXT: stxv vs0, 112(r3)
+; CHECK-BE-NEXT: stxv vs1, 96(r3)
+; CHECK-BE-NEXT: stxv vs2, 80(r3)
+; CHECK-BE-NEXT: stxv vs3, 64(r3)
+; CHECK-BE-NEXT: stxv vs4, 48(r3)
+; CHECK-BE-NEXT: stxv vs5, 32(r3)
+; CHECK-BE-NEXT: stxv vs6, 16(r3)
+; CHECK-BE-NEXT: stxv vs7, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: li r4, 2
; CHECK-P9-NEXT: vextuhrx r3, r3, v2
-; CHECK-P9-NEXT: vextuhrx r4, r4, v2
; CHECK-P9-NEXT: rlwinm r3, r3, 0, 16, 31
-; CHECK-P9-NEXT: rlwinm r4, r4, 0, 16, 31
; CHECK-P9-NEXT: mtvsrwz f0, r3
-; CHECK-P9-NEXT: mtvsrwz f1, r4
+; CHECK-P9-NEXT: li r3, 2
+; CHECK-P9-NEXT: xscvuxdsp f0, f0
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: vextuhrx r3, r3, v2
+; CHECK-P9-NEXT: rlwinm r3, r3, 0, 16, 31
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT: mtvsrwz f0, r3
; CHECK-P9-NEXT: xscvuxdsp f0, f0
-; CHECK-P9-NEXT: xscvuxdsp f1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
; CHECK-BE-NEXT: li r3, 2
-; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: vextuhlx r3, r3, v2
-; CHECK-BE-NEXT: vextuhlx r4, r4, v2
; CHECK-BE-NEXT: rlwinm r3, r3, 0, 16, 31
-; CHECK-BE-NEXT: rlwinm r4, r4, 0, 16, 31
; CHECK-BE-NEXT: mtvsrwz f0, r3
-; CHECK-BE-NEXT: mtvsrwz f1, r4
+; CHECK-BE-NEXT: li r3, 0
+; CHECK-BE-NEXT: xscvuxdsp f0, f0
+; CHECK-BE-NEXT: vextuhlx r3, r3, v2
+; CHECK-BE-NEXT: rlwinm r3, r3, 0, 16, 31
+; CHECK-BE-NEXT: xscvdpspn v3, f0
+; CHECK-BE-NEXT: mtvsrwz f0, r3
; CHECK-BE-NEXT: xscvuxdsp f0, f0
-; CHECK-BE-NEXT: xscvuxdsp f1, f1
; CHECK-BE-NEXT: xscvdpspn v2, f0
-; CHECK-BE-NEXT: xscvdpspn v3, f1
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r3
+; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r3
+; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xxlxor v4, v4, v4
-; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l
-; CHECK-P9-NEXT: xxswapd v3, vs0
-; CHECK-P9-NEXT: lxvx v2, 0, r4
-; CHECK-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxwsp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
-; CHECK-BE-NEXT: mtvsrd v3, r3
+; CHECK-BE-NEXT: mtvsrd v2, r3
+; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r3
; CHECK-BE-NEXT: xxlxor v4, v4, v4
-; CHECK-BE-NEXT: addi r4, r4, .LCPI1_0@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r4
-; CHECK-BE-NEXT: vperm v2, v3, v4, v2
+; CHECK-BE-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxwsp v2, v2
; CHECK-BE-NEXT: blr
entry:
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
-; CHECK-P9-NEXT: addis r5, r2, .LCPI2_1@toc@ha
-; CHECK-P9-NEXT: xxlxor v5, v5, v5
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; CHECK-P9-NEXT: addi r5, r5, .LCPI2_1@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: vperm v3, v5, v2, v3
-; CHECK-P9-NEXT: vperm v2, v5, v2, v4
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxwsp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: xvcvuxwsp vs1, v2
; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
-; CHECK-BE-NEXT: addis r5, r2, .LCPI2_1@toc@ha
-; CHECK-BE-NEXT: xxlxor v5, v5, v5
; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; CHECK-BE-NEXT: addi r5, r5, .LCPI2_1@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: vperm v3, v2, v5, v3
-; CHECK-BE-NEXT: vperm v2, v5, v2, v4
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxwsp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: xvcvuxwsp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <8 x i16> %a to <8 x float>
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI3_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI3_1@toc@ha
; CHECK-P9-NEXT: lxv v2, 16(r4)
; CHECK-P9-NEXT: lxv v3, 0(r4)
-; CHECK-P9-NEXT: xxlxor v0, v0, v0
-; CHECK-P9-NEXT: addi r5, r5, .LCPI3_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI3_1@toc@l
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: lxvx v5, 0, r6
-; CHECK-P9-NEXT: vperm v1, v0, v3, v4
-; CHECK-P9-NEXT: vperm v3, v0, v3, v5
-; CHECK-P9-NEXT: vperm v4, v0, v2, v4
-; CHECK-P9-NEXT: vperm v2, v0, v2, v5
-; CHECK-P9-NEXT: xvcvuxwsp vs0, v1
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l
+; CHECK-P9-NEXT: lxvx v4, 0, r4
+; CHECK-P9-NEXT: xxlxor v5, v5, v5
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-P9-NEXT: vperm v0, v5, v3, v4
+; CHECK-P9-NEXT: xvcvuxwsp vs0, v0
+; CHECK-P9-NEXT: lxvx v0, 0, r4
+; CHECK-P9-NEXT: vperm v3, v5, v3, v0
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: xvcvuxwsp vs1, v3
-; CHECK-P9-NEXT: xvcvuxwsp vs2, v4
+; CHECK-P9-NEXT: vperm v3, v5, v2, v4
+; CHECK-P9-NEXT: vperm v2, v5, v2, v0
+; CHECK-P9-NEXT: xvcvuxwsp vs2, v3
; CHECK-P9-NEXT: xvcvuxwsp vs3, v2
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI3_1@toc@ha
; CHECK-BE-NEXT: lxv v2, 16(r4)
; CHECK-BE-NEXT: lxv v3, 0(r4)
-; CHECK-BE-NEXT: xxlxor v0, v0, v0
-; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI3_1@toc@l
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: vperm v1, v3, v0, v4
-; CHECK-BE-NEXT: vperm v3, v0, v3, v5
-; CHECK-BE-NEXT: vperm v4, v2, v0, v4
-; CHECK-BE-NEXT: vperm v2, v0, v2, v5
-; CHECK-BE-NEXT: xvcvuxwsp vs0, v1
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: xxlxor v5, v5, v5
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-BE-NEXT: vperm v0, v3, v5, v4
+; CHECK-BE-NEXT: xvcvuxwsp vs0, v0
+; CHECK-BE-NEXT: lxvx v0, 0, r4
+; CHECK-BE-NEXT: vperm v3, v5, v3, v0
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: xvcvuxwsp vs1, v3
-; CHECK-BE-NEXT: xvcvuxwsp vs2, v4
+; CHECK-BE-NEXT: vperm v3, v2, v5, v4
+; CHECK-BE-NEXT: vperm v2, v5, v2, v0
+; CHECK-BE-NEXT: xvcvuxwsp vs2, v3
; CHECK-BE-NEXT: xvcvuxwsp vs3, v2
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i16>, <16 x i16>* %0, align 32
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: li r4, 2
; CHECK-P9-NEXT: vextuhrx r3, r3, v2
-; CHECK-P9-NEXT: vextuhrx r4, r4, v2
; CHECK-P9-NEXT: extsh r3, r3
-; CHECK-P9-NEXT: extsh r4, r4
; CHECK-P9-NEXT: mtvsrwa f0, r3
-; CHECK-P9-NEXT: mtvsrwa f1, r4
+; CHECK-P9-NEXT: li r3, 2
+; CHECK-P9-NEXT: xscvsxdsp f0, f0
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: vextuhrx r3, r3, v2
+; CHECK-P9-NEXT: extsh r3, r3
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT: mtvsrwa f0, r3
; CHECK-P9-NEXT: xscvsxdsp f0, f0
-; CHECK-P9-NEXT: xscvsxdsp f1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
; CHECK-BE-NEXT: li r3, 2
-; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: vextuhlx r3, r3, v2
-; CHECK-BE-NEXT: vextuhlx r4, r4, v2
; CHECK-BE-NEXT: extsh r3, r3
-; CHECK-BE-NEXT: extsh r4, r4
; CHECK-BE-NEXT: mtvsrwa f0, r3
-; CHECK-BE-NEXT: mtvsrwa f1, r4
+; CHECK-BE-NEXT: li r3, 0
+; CHECK-BE-NEXT: xscvsxdsp f0, f0
+; CHECK-BE-NEXT: vextuhlx r3, r3, v2
+; CHECK-BE-NEXT: extsh r3, r3
+; CHECK-BE-NEXT: xscvdpspn v3, f0
+; CHECK-BE-NEXT: mtvsrwa f0, r3
; CHECK-BE-NEXT: xscvsxdsp f0, f0
-; CHECK-BE-NEXT: xscvsxdsp f1, f1
; CHECK-BE-NEXT: xscvdpspn v2, f0
-; CHECK-BE-NEXT: xscvdpspn v3, f1
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha
-; CHECK-BE-NEXT: xxlxor v4, v4, v4
; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
; CHECK-BE-NEXT: vperm v3, v4, v2, v3
; CHECK-BE-NEXT: vmrghh v2, v2, v2
; CHECK-BE-NEXT: vextsh2w v3, v3
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 16(r4)
; CHECK-P9-NEXT: lxv v3, 0(r4)
+; CHECK-P9-NEXT: lxv v2, 16(r4)
; CHECK-P9-NEXT: vmrglh v4, v3, v3
; CHECK-P9-NEXT: vmrghh v3, v3, v3
-; CHECK-P9-NEXT: vmrglh v5, v2, v2
+; CHECK-P9-NEXT: vextsh2w v3, v3
+; CHECK-P9-NEXT: xvcvsxwsp vs1, v3
+; CHECK-P9-NEXT: vmrglh v3, v2, v2
; CHECK-P9-NEXT: vmrghh v2, v2, v2
; CHECK-P9-NEXT: vextsh2w v4, v4
+; CHECK-P9-NEXT: xvcvsxwsp vs0, v4
; CHECK-P9-NEXT: vextsh2w v3, v3
-; CHECK-P9-NEXT: vextsh2w v5, v5
; CHECK-P9-NEXT: vextsh2w v2, v2
-; CHECK-P9-NEXT: xvcvsxwsp vs0, v4
-; CHECK-P9-NEXT: xvcvsxwsp vs1, v3
-; CHECK-P9-NEXT: xvcvsxwsp vs2, v5
+; CHECK-P9-NEXT: xvcvsxwsp vs2, v3
; CHECK-P9-NEXT: xvcvsxwsp vs3, v2
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha
; CHECK-BE-NEXT: lxv v2, 16(r4)
; CHECK-BE-NEXT: lxv v3, 0(r4)
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l
+; CHECK-BE-NEXT: lxvx v4, 0, r4
; CHECK-BE-NEXT: xxlxor v5, v5, v5
-; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l
-; CHECK-BE-NEXT: lxvx v4, 0, r5
; CHECK-BE-NEXT: vperm v0, v5, v3, v4
; CHECK-BE-NEXT: vperm v4, v5, v2, v4
; CHECK-BE-NEXT: vmrghh v3, v3, v3
; CHECK-BE-NEXT: vmrghh v2, v2, v2
-; CHECK-BE-NEXT: vextsh2w v5, v0
+; CHECK-BE-NEXT: vextsh2w v0, v0
; CHECK-BE-NEXT: vextsh2w v4, v4
; CHECK-BE-NEXT: vextsh2w v3, v3
; CHECK-BE-NEXT: vextsh2w v2, v2
-; CHECK-BE-NEXT: xvcvsxwsp vs0, v5
+; CHECK-BE-NEXT: xvcvsxwsp vs0, v0
; CHECK-BE-NEXT: xvcvsxwsp vs1, v4
; CHECK-BE-NEXT: xvcvsxwsp vs2, v3
; CHECK-BE-NEXT: xvcvsxwsp vs3, v2
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r4, r2, .LCPI0_0@toc@ha
-; CHECK-P9-NEXT: mtvsrws v3, r3
+; CHECK-P9-NEXT: mtvsrws v2, r3
+; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r3
; CHECK-P9-NEXT: xxlxor v4, v4, v4
-; CHECK-P9-NEXT: addi r4, r4, .LCPI0_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r4
-; CHECK-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
-; CHECK-BE-NEXT: mtvsrws v3, r3
+; CHECK-BE-NEXT: mtvsrws v2, r3
+; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r3
; CHECK-BE-NEXT: xxlxor v4, v4, v4
-; CHECK-BE-NEXT: addi r4, r4, .LCPI0_0@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r4
-; CHECK-BE-NEXT: vperm v2, v3, v4, v2
+; CHECK-BE-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxddp v2, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI1_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI1_1@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
-; CHECK-P9-NEXT: xxlxor v5, v5, v5
-; CHECK-P9-NEXT: addi r5, r5, .LCPI1_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI1_1@toc@l
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: vperm v2, v5, v4, v2
-; CHECK-P9-NEXT: vperm v3, v5, v4, v3
-; CHECK-P9-NEXT: xvcvuxddp vs0, v2
-; CHECK-P9-NEXT: xvcvuxddp vs1, v3
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI1_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI1_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: xvcvuxddp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs1, v2
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI1_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI1_1@toc@ha
-; CHECK-BE-NEXT: mtvsrd v4, r4
-; CHECK-BE-NEXT: xxlxor v5, v5, v5
-; CHECK-BE-NEXT: addi r5, r5, .LCPI1_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI1_1@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: vperm v2, v4, v5, v2
-; CHECK-BE-NEXT: vperm v3, v5, v4, v3
-; CHECK-BE-NEXT: xvcvuxddp vs0, v2
-; CHECK-BE-NEXT: xvcvuxddp vs1, v3
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI1_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI1_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI1_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
+; CHECK-BE-NEXT: xvcvuxddp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs1, v2
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <4 x i16>
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
-; CHECK-P9-NEXT: addis r5, r2, .LCPI2_1@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI2_2@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI2_3@toc@ha
-; CHECK-P9-NEXT: xxlxor v1, v1, v1
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; CHECK-P9-NEXT: addi r5, r5, .LCPI2_1@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI2_2@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI2_3@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: lxvx v5, 0, r6
-; CHECK-P9-NEXT: lxvx v0, 0, r7
-; CHECK-P9-NEXT: vperm v3, v1, v2, v3
-; CHECK-P9-NEXT: vperm v4, v1, v2, v4
-; CHECK-P9-NEXT: vperm v5, v1, v2, v5
-; CHECK-P9-NEXT: vperm v2, v1, v2, v0
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxddp vs0, v3
-; CHECK-P9-NEXT: xvcvuxddp vs1, v4
-; CHECK-P9-NEXT: xvcvuxddp vs2, v5
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs1, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: xvcvuxddp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
-; CHECK-BE-NEXT: addis r5, r2, .LCPI2_1@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI2_2@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI2_3@toc@ha
-; CHECK-BE-NEXT: xxlxor v1, v1, v1
; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; CHECK-BE-NEXT: addi r5, r5, .LCPI2_1@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI2_2@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI2_3@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: lxvx v0, 0, r7
-; CHECK-BE-NEXT: vperm v3, v2, v1, v3
-; CHECK-BE-NEXT: vperm v4, v1, v2, v4
-; CHECK-BE-NEXT: vperm v5, v1, v2, v5
-; CHECK-BE-NEXT: vperm v2, v1, v2, v0
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxddp vs0, v3
-; CHECK-BE-NEXT: xvcvuxddp vs1, v4
-; CHECK-BE-NEXT: xvcvuxddp vs2, v5
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_2@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs1, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_3@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs2, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: xvcvuxddp vs3, v2
; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <8 x i16> %a to <8 x double>
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI3_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI3_1@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI3_2@toc@ha
-; CHECK-P9-NEXT: addis r8, r2, .LCPI3_3@toc@ha
-; CHECK-P9-NEXT: lxv v0, 0(r4)
-; CHECK-P9-NEXT: lxv v1, 16(r4)
-; CHECK-P9-NEXT: xxlxor v6, v6, v6
-; CHECK-P9-NEXT: addi r5, r5, .LCPI3_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI3_1@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI3_2@toc@l
-; CHECK-P9-NEXT: addi r8, r8, .LCPI3_3@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: lxvx v4, 0, r7
-; CHECK-P9-NEXT: lxvx v5, 0, r8
-; CHECK-P9-NEXT: vperm v7, v6, v0, v2
-; CHECK-P9-NEXT: vperm v8, v6, v0, v3
-; CHECK-P9-NEXT: vperm v9, v6, v0, v4
-; CHECK-P9-NEXT: vperm v0, v6, v0, v5
-; CHECK-P9-NEXT: vperm v2, v6, v1, v2
-; CHECK-P9-NEXT: vperm v3, v6, v1, v3
-; CHECK-P9-NEXT: vperm v4, v6, v1, v4
-; CHECK-P9-NEXT: vperm v5, v6, v1, v5
-; CHECK-P9-NEXT: xvcvuxddp vs0, v7
-; CHECK-P9-NEXT: xvcvuxddp vs1, v8
-; CHECK-P9-NEXT: xvcvuxddp vs2, v9
-; CHECK-P9-NEXT: xvcvuxddp vs3, v0
-; CHECK-P9-NEXT: xvcvuxddp vs4, v2
+; CHECK-P9-NEXT: lxv v2, 16(r4)
+; CHECK-P9-NEXT: lxv v3, 0(r4)
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l
+; CHECK-P9-NEXT: lxvx v4, 0, r4
+; CHECK-P9-NEXT: xxlxor v5, v5, v5
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-P9-NEXT: vperm v0, v5, v3, v4
+; CHECK-P9-NEXT: xvcvuxddp vs0, v0
+; CHECK-P9-NEXT: lxvx v0, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_2@toc@l
+; CHECK-P9-NEXT: vperm v1, v5, v3, v0
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs1, v1
+; CHECK-P9-NEXT: lxvx v1, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_3@toc@l
+; CHECK-P9-NEXT: vperm v6, v5, v3, v1
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs2, v6
+; CHECK-P9-NEXT: lxvx v6, 0, r4
+; CHECK-P9-NEXT: vperm v3, v5, v3, v6
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs3, v3
+; CHECK-P9-NEXT: vperm v3, v5, v2, v4
+; CHECK-P9-NEXT: xvcvuxddp vs4, v3
+; CHECK-P9-NEXT: vperm v3, v5, v2, v0
; CHECK-P9-NEXT: xvcvuxddp vs5, v3
-; CHECK-P9-NEXT: xvcvuxddp vs6, v4
-; CHECK-P9-NEXT: xvcvuxddp vs7, v5
+; CHECK-P9-NEXT: vperm v3, v5, v2, v1
+; CHECK-P9-NEXT: vperm v2, v5, v2, v6
; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs6, v3
+; CHECK-P9-NEXT: xvcvuxddp vs7, v2
+; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
-; CHECK-P9-NEXT: stxv vs5, 80(r3)
-; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI3_1@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI3_2@toc@ha
-; CHECK-BE-NEXT: addis r8, r2, .LCPI3_3@toc@ha
-; CHECK-BE-NEXT: lxv v0, 0(r4)
-; CHECK-BE-NEXT: lxv v1, 16(r4)
-; CHECK-BE-NEXT: xxlxor v6, v6, v6
-; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI3_1@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI3_2@toc@l
-; CHECK-BE-NEXT: addi r8, r8, .LCPI3_3@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: lxvx v4, 0, r7
-; CHECK-BE-NEXT: lxvx v5, 0, r8
-; CHECK-BE-NEXT: vperm v7, v0, v6, v2
-; CHECK-BE-NEXT: vperm v8, v6, v0, v3
-; CHECK-BE-NEXT: vperm v9, v6, v0, v4
-; CHECK-BE-NEXT: vperm v0, v6, v0, v5
-; CHECK-BE-NEXT: vperm v2, v1, v6, v2
-; CHECK-BE-NEXT: vperm v3, v6, v1, v3
-; CHECK-BE-NEXT: vperm v4, v6, v1, v4
-; CHECK-BE-NEXT: vperm v5, v6, v1, v5
-; CHECK-BE-NEXT: xvcvuxddp vs0, v7
-; CHECK-BE-NEXT: xvcvuxddp vs1, v8
-; CHECK-BE-NEXT: xvcvuxddp vs2, v9
-; CHECK-BE-NEXT: xvcvuxddp vs3, v0
-; CHECK-BE-NEXT: xvcvuxddp vs4, v2
+; CHECK-BE-NEXT: lxv v2, 16(r4)
+; CHECK-BE-NEXT: lxv v3, 0(r4)
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: xxlxor v5, v5, v5
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-BE-NEXT: vperm v0, v3, v5, v4
+; CHECK-BE-NEXT: xvcvuxddp vs0, v0
+; CHECK-BE-NEXT: lxvx v0, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_2@toc@l
+; CHECK-BE-NEXT: vperm v1, v5, v3, v0
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs1, v1
+; CHECK-BE-NEXT: lxvx v1, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_3@toc@l
+; CHECK-BE-NEXT: vperm v6, v5, v3, v1
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs2, v6
+; CHECK-BE-NEXT: lxvx v6, 0, r4
+; CHECK-BE-NEXT: vperm v3, v5, v3, v6
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs3, v3
+; CHECK-BE-NEXT: vperm v3, v2, v5, v4
+; CHECK-BE-NEXT: xvcvuxddp vs4, v3
+; CHECK-BE-NEXT: vperm v3, v5, v2, v0
; CHECK-BE-NEXT: xvcvuxddp vs5, v3
-; CHECK-BE-NEXT: xvcvuxddp vs6, v4
-; CHECK-BE-NEXT: xvcvuxddp vs7, v5
+; CHECK-BE-NEXT: vperm v3, v5, v2, v1
+; CHECK-BE-NEXT: vperm v2, v5, v2, v6
; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs6, v3
+; CHECK-BE-NEXT: xvcvuxddp vs7, v2
+; CHECK-BE-NEXT: stxv vs4, 64(r3)
+; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
-; CHECK-BE-NEXT: stxv vs5, 80(r3)
-; CHECK-BE-NEXT: stxv vs4, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i16>, <16 x i16>* %0, align 32
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r4, r2, .LCPI4_0@toc@ha
-; CHECK-P9-NEXT: mtvsrws v3, r3
-; CHECK-P9-NEXT: addi r4, r4, .LCPI4_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r4
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
+; CHECK-P9-NEXT: mtvsrws v2, r3
+; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r3
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: xvcvsxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha
-; CHECK-BE-NEXT: mtvsrws v3, r3
-; CHECK-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r4
-; CHECK-BE-NEXT: vperm v2, v3, v3, v2
+; CHECK-BE-NEXT: mtvsrws v2, r3
+; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r3
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: vextsh2d v2, v2
; CHECK-BE-NEXT: xvcvsxddp v2, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI5_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI5_1@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
-; CHECK-P9-NEXT: addi r5, r5, .LCPI5_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI5_1@toc@l
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: vperm v2, v4, v4, v2
-; CHECK-P9-NEXT: vperm v3, v4, v4, v3
-; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: addis r4, r2, .LCPI5_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI5_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: addis r4, r2, .LCPI5_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI5_1@toc@l
; CHECK-P9-NEXT: vextsh2d v3, v3
-; CHECK-P9-NEXT: xvcvsxddp vs0, v2
-; CHECK-P9-NEXT: xvcvsxddp vs1, v3
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: xvcvsxddp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: xvcvsxddp vs1, v2
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI5_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI5_1@toc@ha
-; CHECK-BE-NEXT: mtvsrd v4, r4
-; CHECK-BE-NEXT: xxlxor v5, v5, v5
-; CHECK-BE-NEXT: addi r5, r5, .LCPI5_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI5_1@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: vperm v2, v5, v4, v2
-; CHECK-BE-NEXT: vperm v3, v4, v4, v3
-; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-NEXT: vperm v3, v3, v2, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-BE-NEXT: vextsh2d v3, v3
-; CHECK-BE-NEXT: xvcvsxddp vs0, v2
-; CHECK-BE-NEXT: xvcvsxddp vs1, v3
+; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l
+; CHECK-BE-NEXT: xvcvsxddp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xvcvsxddp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha
-; CHECK-P9-NEXT: addis r5, r2, .LCPI6_1@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI6_2@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI6_3@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l
-; CHECK-P9-NEXT: addi r5, r5, .LCPI6_1@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI6_2@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI6_3@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: lxvx v5, 0, r6
-; CHECK-P9-NEXT: lxvx v0, 0, r7
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_1@toc@l
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
-; CHECK-P9-NEXT: vperm v4, v2, v2, v4
-; CHECK-P9-NEXT: vperm v5, v2, v2, v5
-; CHECK-P9-NEXT: vperm v2, v2, v2, v0
; CHECK-P9-NEXT: vextsh2d v3, v3
-; CHECK-P9-NEXT: vextsh2d v4, v4
-; CHECK-P9-NEXT: vextsh2d v5, v5
-; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: xvcvsxddp vs0, v3
-; CHECK-P9-NEXT: xvcvsxddp vs1, v4
-; CHECK-P9-NEXT: xvcvsxddp vs2, v5
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: vextsh2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs1, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: vextsh2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: xvcvsxddp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha
-; CHECK-BE-NEXT: addis r5, r2, .LCPI6_1@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI6_2@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI6_3@toc@ha
-; CHECK-BE-NEXT: xxlxor v1, v1, v1
; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l
-; CHECK-BE-NEXT: addi r5, r5, .LCPI6_1@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI6_2@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI6_3@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: lxvx v0, 0, r7
-; CHECK-BE-NEXT: vperm v3, v1, v2, v3
-; CHECK-BE-NEXT: vperm v4, v1, v2, v4
-; CHECK-BE-NEXT: vperm v5, v2, v2, v5
-; CHECK-BE-NEXT: vperm v2, v2, v2, v0
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l
; CHECK-BE-NEXT: vextsh2d v3, v3
-; CHECK-BE-NEXT: vextsh2d v4, v4
-; CHECK-BE-NEXT: vextsh2d v5, v5
-; CHECK-BE-NEXT: vextsh2d v2, v2
; CHECK-BE-NEXT: xvcvsxddp vs0, v3
-; CHECK-BE-NEXT: xvcvsxddp vs1, v4
-; CHECK-BE-NEXT: xvcvsxddp vs2, v5
-; CHECK-BE-NEXT: xvcvsxddp vs3, v2
-; CHECK-BE-NEXT: stxv vs1, 48(r3)
-; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_2@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: vextsh2d v3, v3
+; CHECK-BE-NEXT: xvcvsxddp vs1, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_3@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v2, v3
+; CHECK-BE-NEXT: stxv vs1, 48(r3)
+; CHECK-BE-NEXT: vextsh2d v3, v3
+; CHECK-BE-NEXT: xvcvsxddp vs2, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxv vs2, 0(r3)
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xvcvsxddp vs3, v2
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <8 x i16> %a to <8 x double>
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r5, r2, .LCPI7_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI7_1@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI7_2@toc@ha
-; CHECK-P9-NEXT: addis r8, r2, .LCPI7_3@toc@ha
-; CHECK-P9-NEXT: lxv v0, 0(r4)
-; CHECK-P9-NEXT: lxv v1, 16(r4)
; CHECK-P9-NEXT: addi r5, r5, .LCPI7_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI7_1@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI7_2@toc@l
-; CHECK-P9-NEXT: addi r8, r8, .LCPI7_3@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: lxvx v4, 0, r7
-; CHECK-P9-NEXT: lxvx v5, 0, r8
-; CHECK-P9-NEXT: vperm v6, v0, v0, v2
-; CHECK-P9-NEXT: vperm v7, v0, v0, v3
-; CHECK-P9-NEXT: vperm v8, v0, v0, v4
-; CHECK-P9-NEXT: vperm v0, v0, v0, v5
-; CHECK-P9-NEXT: vperm v2, v1, v1, v2
-; CHECK-P9-NEXT: vperm v3, v1, v1, v3
-; CHECK-P9-NEXT: vperm v4, v1, v1, v4
-; CHECK-P9-NEXT: vperm v5, v1, v1, v5
-; CHECK-P9-NEXT: vextsh2d v1, v6
-; CHECK-P9-NEXT: vextsh2d v6, v7
-; CHECK-P9-NEXT: vextsh2d v7, v8
-; CHECK-P9-NEXT: vextsh2d v0, v0
-; CHECK-P9-NEXT: vextsh2d v2, v2
-; CHECK-P9-NEXT: vextsh2d v3, v3
+; CHECK-P9-NEXT: lxv v2, 0(r4)
+; CHECK-P9-NEXT: lxvx v3, 0, r5
+; CHECK-P9-NEXT: addis r5, r2, .LCPI7_1@toc@ha
+; CHECK-P9-NEXT: addi r5, r5, .LCPI7_1@toc@l
+; CHECK-P9-NEXT: lxvx v5, 0, r5
+; CHECK-P9-NEXT: addis r5, r2, .LCPI7_2@toc@ha
+; CHECK-P9-NEXT: vperm v4, v2, v2, v3
+; CHECK-P9-NEXT: addi r5, r5, .LCPI7_2@toc@l
; CHECK-P9-NEXT: vextsh2d v4, v4
-; CHECK-P9-NEXT: vextsh2d v5, v5
-; CHECK-P9-NEXT: xvcvsxddp vs0, v1
-; CHECK-P9-NEXT: xvcvsxddp vs1, v6
-; CHECK-P9-NEXT: xvcvsxddp vs2, v7
-; CHECK-P9-NEXT: xvcvsxddp vs3, v0
-; CHECK-P9-NEXT: xvcvsxddp vs4, v2
-; CHECK-P9-NEXT: xvcvsxddp vs5, v3
-; CHECK-P9-NEXT: xvcvsxddp vs6, v4
-; CHECK-P9-NEXT: xvcvsxddp vs7, v5
+; CHECK-P9-NEXT: lxvx v0, 0, r5
+; CHECK-P9-NEXT: addis r5, r2, .LCPI7_3@toc@ha
+; CHECK-P9-NEXT: xvcvsxddp vs0, v4
+; CHECK-P9-NEXT: vperm v4, v2, v2, v5
+; CHECK-P9-NEXT: addi r5, r5, .LCPI7_3@toc@l
+; CHECK-P9-NEXT: lxvx v1, 0, r5
+; CHECK-P9-NEXT: vextsh2d v4, v4
+; CHECK-P9-NEXT: xvcvsxddp vs1, v4
+; CHECK-P9-NEXT: vperm v4, v2, v2, v0
+; CHECK-P9-NEXT: vperm v2, v2, v2, v1
+; CHECK-P9-NEXT: vextsh2d v4, v4
+; CHECK-P9-NEXT: xvcvsxddp vs2, v4
+; CHECK-P9-NEXT: lxv v4, 16(r4)
+; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: xvcvsxddp vs3, v2
+; CHECK-P9-NEXT: vperm v2, v4, v4, v3
+; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: xvcvsxddp vs4, v2
+; CHECK-P9-NEXT: vperm v2, v4, v4, v5
+; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: xvcvsxddp vs5, v2
+; CHECK-P9-NEXT: vperm v2, v4, v4, v0
+; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: stxv vs5, 80(r3)
+; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: xvcvsxddp vs6, v2
+; CHECK-P9-NEXT: vperm v2, v4, v4, v1
+; CHECK-P9-NEXT: vextsh2d v2, v2
+; CHECK-P9-NEXT: stxv vs6, 96(r3)
+; CHECK-P9-NEXT: xvcvsxddp vs7, v2
+; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs7, 112(r3)
-; CHECK-P9-NEXT: stxv vs6, 96(r3)
-; CHECK-P9-NEXT: stxv vs5, 80(r3)
-; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI7_1@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI7_2@toc@ha
-; CHECK-BE-NEXT: addis r8, r2, .LCPI7_3@toc@ha
-; CHECK-BE-NEXT: lxv v2, 16(r4)
-; CHECK-BE-NEXT: lxv v3, 0(r4)
-; CHECK-BE-NEXT: xxlxor v6, v6, v6
; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI7_1@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI7_2@toc@l
-; CHECK-BE-NEXT: addi r8, r8, .LCPI7_3@toc@l
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: lxvx v0, 0, r7
-; CHECK-BE-NEXT: lxvx v1, 0, r8
-; CHECK-BE-NEXT: vperm v7, v6, v3, v4
-; CHECK-BE-NEXT: vperm v8, v6, v3, v5
-; CHECK-BE-NEXT: vperm v4, v6, v2, v4
-; CHECK-BE-NEXT: vperm v5, v6, v2, v5
-; CHECK-BE-NEXT: vperm v6, v3, v3, v0
-; CHECK-BE-NEXT: vperm v3, v3, v3, v1
-; CHECK-BE-NEXT: vperm v0, v2, v2, v0
-; CHECK-BE-NEXT: vperm v2, v2, v2, v1
-; CHECK-BE-NEXT: vextsh2d v1, v7
-; CHECK-BE-NEXT: vextsh2d v7, v8
-; CHECK-BE-NEXT: vextsh2d v4, v4
-; CHECK-BE-NEXT: vextsh2d v5, v5
-; CHECK-BE-NEXT: vextsh2d v6, v6
-; CHECK-BE-NEXT: vextsh2d v3, v3
+; CHECK-BE-NEXT: lxvx v2, 0, r5
+; CHECK-BE-NEXT: lxv v4, 0(r4)
+; CHECK-BE-NEXT: lxv v1, 16(r4)
+; CHECK-BE-NEXT: addis r5, r2, .LCPI7_1@toc@ha
+; CHECK-BE-NEXT: addi r5, r5, .LCPI7_1@toc@l
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha
+; CHECK-BE-NEXT: xxlxor v5, v5, v5
+; CHECK-BE-NEXT: vperm v0, v5, v4, v2
+; CHECK-BE-NEXT: lxvx v3, 0, r5
+; CHECK-BE-NEXT: vperm v2, v5, v1, v2
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l
; CHECK-BE-NEXT: vextsh2d v0, v0
+; CHECK-BE-NEXT: xvcvsxddp vs2, v2
+; CHECK-BE-NEXT: vperm v2, v5, v1, v3
; CHECK-BE-NEXT: vextsh2d v2, v2
-; CHECK-BE-NEXT: xvcvsxddp vs0, v1
-; CHECK-BE-NEXT: xvcvsxddp vs1, v7
-; CHECK-BE-NEXT: xvcvsxddp vs2, v4
-; CHECK-BE-NEXT: xvcvsxddp vs3, v5
-; CHECK-BE-NEXT: xvcvsxddp vs4, v6
-; CHECK-BE-NEXT: xvcvsxddp vs5, v3
-; CHECK-BE-NEXT: xvcvsxddp vs6, v0
-; CHECK-BE-NEXT: xvcvsxddp vs7, v2
-; CHECK-BE-NEXT: stxv vs3, 112(r3)
; CHECK-BE-NEXT: stxv vs2, 80(r3)
+; CHECK-BE-NEXT: xvcvsxddp vs3, v2
+; CHECK-BE-NEXT: lxvx v2, 0, r4
+; CHECK-BE-NEXT: xvcvsxddp vs0, v0
+; CHECK-BE-NEXT: vperm v0, v5, v4, v3
+; CHECK-BE-NEXT: vperm v3, v4, v4, v2
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_3@toc@ha
+; CHECK-BE-NEXT: vextsh2d v0, v0
+; CHECK-BE-NEXT: xvcvsxddp vs1, v0
; CHECK-BE-NEXT: stxv vs1, 48(r3)
+; CHECK-BE-NEXT: vextsh2d v3, v3
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l
+; CHECK-BE-NEXT: xvcvsxddp vs4, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v1, v1, v2
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xvcvsxddp vs6, v2
+; CHECK-BE-NEXT: vperm v2, v1, v1, v3
+; CHECK-BE-NEXT: vperm v4, v4, v4, v3
+; CHECK-BE-NEXT: vextsh2d v4, v4
+; CHECK-BE-NEXT: vextsh2d v2, v2
+; CHECK-BE-NEXT: xvcvsxddp vs7, v2
+; CHECK-BE-NEXT: xvcvsxddp vs5, v4
+; CHECK-BE-NEXT: stxv vs3, 112(r3)
+; CHECK-BE-NEXT: stxv vs6, 64(r3)
; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: stxv vs4, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 96(r3)
-; CHECK-BE-NEXT: stxv vs6, 64(r3)
; CHECK-BE-NEXT: stxv vs5, 32(r3)
-; CHECK-BE-NEXT: stxv vs4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i16>, <16 x i16>* %0, align 32
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT: lxv vs0, 16(r4)
+; CHECK-P9-NEXT: xvcvuxwdp vs2, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs1, vs1
+; CHECK-P9-NEXT: xvcvuxwdp vs1, v2
+; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT: xvcvuxwdp vs3, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: xvcvuxwdp vs0, v2
-; CHECK-P9-NEXT: xvcvuxwdp vs1, v3
-; CHECK-P9-NEXT: xvcvuxwdp vs2, v4
-; CHECK-P9-NEXT: xvcvuxwdp vs3, v5
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: stxv vs3, 32(r3)
+; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT: lxv vs0, 16(r4)
+; CHECK-BE-NEXT: xvcvuxwdp vs2, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs1, vs1
+; CHECK-BE-NEXT: xvcvuxwdp vs1, v2
+; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT: xvcvuxwdp vs3, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: xvcvuxwdp vs0, v2
-; CHECK-BE-NEXT: xvcvuxwdp vs1, v3
-; CHECK-BE-NEXT: xvcvuxwdp vs2, v4
-; CHECK-BE-NEXT: xvcvuxwdp vs3, v5
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i32>, <8 x i32>* %0, align 32
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: lxv vs3, 32(r4)
-; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
-; CHECK-P9-NEXT: xxmrglw v0, vs3, vs3
-; CHECK-P9-NEXT: xxmrghw v1, vs3, vs3
-; CHECK-P9-NEXT: xxmrglw v6, vs2, vs2
-; CHECK-P9-NEXT: xxmrghw v7, vs2, vs2
+; CHECK-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT: lxv vs2, 16(r4)
+; CHECK-P9-NEXT: lxv vs4, 48(r4)
+; CHECK-P9-NEXT: xvcvuxwdp vs1, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT: lxv vs5, 32(r4)
; CHECK-P9-NEXT: xvcvuxwdp vs0, v2
-; CHECK-P9-NEXT: xvcvuxwdp vs1, v3
-; CHECK-P9-NEXT: xvcvuxwdp vs2, v4
-; CHECK-P9-NEXT: xvcvuxwdp vs3, v5
-; CHECK-P9-NEXT: xvcvuxwdp vs4, v0
-; CHECK-P9-NEXT: xvcvuxwdp vs5, v1
-; CHECK-P9-NEXT: xvcvuxwdp vs6, v6
-; CHECK-P9-NEXT: xvcvuxwdp vs7, v7
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs7, 112(r3)
-; CHECK-P9-NEXT: stxv vs6, 96(r3)
+; CHECK-P9-NEXT: xxmrglw v2, vs2, vs2
+; CHECK-P9-NEXT: xvcvuxwdp vs3, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs2, vs2
+; CHECK-P9-NEXT: stxv vs1, 0(r3)
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: xvcvuxwdp vs2, v2
+; CHECK-P9-NEXT: xxmrglw v2, vs5, vs5
+; CHECK-P9-NEXT: xvcvuxwdp vs6, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs5, vs5
+; CHECK-P9-NEXT: stxv vs3, 32(r3)
+; CHECK-P9-NEXT: stxv vs2, 48(r3)
+; CHECK-P9-NEXT: xvcvuxwdp vs5, v2
+; CHECK-P9-NEXT: xxmrglw v2, vs4, vs4
+; CHECK-P9-NEXT: xvcvuxwdp vs7, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs4, vs4
+; CHECK-P9-NEXT: stxv vs6, 64(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
-; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: xvcvuxwdp vs4, v2
+; CHECK-P9-NEXT: stxv vs7, 96(r3)
+; CHECK-P9-NEXT: stxv vs4, 112(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
-; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: lxv vs2, 48(r4)
-; CHECK-BE-NEXT: lxv vs3, 32(r4)
-; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
-; CHECK-BE-NEXT: xxmrghw v0, vs3, vs3
-; CHECK-BE-NEXT: xxmrglw v1, vs3, vs3
-; CHECK-BE-NEXT: xxmrghw v6, vs2, vs2
-; CHECK-BE-NEXT: xxmrglw v7, vs2, vs2
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT: lxv vs2, 16(r4)
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: xvcvuxwdp vs1, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
; CHECK-BE-NEXT: xvcvuxwdp vs0, v2
-; CHECK-BE-NEXT: xvcvuxwdp vs1, v3
-; CHECK-BE-NEXT: xvcvuxwdp vs2, v4
-; CHECK-BE-NEXT: xvcvuxwdp vs3, v5
-; CHECK-BE-NEXT: xvcvuxwdp vs4, v0
-; CHECK-BE-NEXT: xvcvuxwdp vs5, v1
-; CHECK-BE-NEXT: xvcvuxwdp vs6, v6
-; CHECK-BE-NEXT: xvcvuxwdp vs7, v7
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
-; CHECK-BE-NEXT: stxv vs7, 112(r3)
-; CHECK-BE-NEXT: stxv vs6, 96(r3)
+; CHECK-BE-NEXT: xxmrghw v2, vs2, vs2
+; CHECK-BE-NEXT: xvcvuxwdp vs3, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs2, vs2
+; CHECK-BE-NEXT: stxv vs1, 0(r3)
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: xvcvuxwdp vs2, v2
+; CHECK-BE-NEXT: xxmrghw v2, vs5, vs5
+; CHECK-BE-NEXT: xvcvuxwdp vs6, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs5, vs5
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs2, 48(r3)
+; CHECK-BE-NEXT: xvcvuxwdp vs5, v2
+; CHECK-BE-NEXT: xxmrghw v2, vs4, vs4
+; CHECK-BE-NEXT: xvcvuxwdp vs7, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs4, vs4
+; CHECK-BE-NEXT: stxv vs6, 64(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
-; CHECK-BE-NEXT: stxv vs4, 64(r3)
+; CHECK-BE-NEXT: xvcvuxwdp vs4, v2
+; CHECK-BE-NEXT: stxv vs7, 96(r3)
+; CHECK-BE-NEXT: stxv vs4, 112(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i32>, <16 x i32>* %0, align 64
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT: lxv vs0, 16(r4)
+; CHECK-P9-NEXT: xvcvsxwdp vs2, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs1, vs1
+; CHECK-P9-NEXT: xvcvsxwdp vs1, v2
+; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT: xvcvsxwdp vs3, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: xvcvsxwdp vs0, v2
-; CHECK-P9-NEXT: xvcvsxwdp vs1, v3
-; CHECK-P9-NEXT: xvcvsxwdp vs2, v4
-; CHECK-P9-NEXT: xvcvsxwdp vs3, v5
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: stxv vs3, 32(r3)
+; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT: lxv vs0, 16(r4)
+; CHECK-BE-NEXT: xvcvsxwdp vs2, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs1, vs1
+; CHECK-BE-NEXT: xvcvsxwdp vs1, v2
+; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT: xvcvsxwdp vs3, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: xvcvsxwdp vs0, v2
-; CHECK-BE-NEXT: xvcvsxwdp vs1, v3
-; CHECK-BE-NEXT: xvcvsxwdp vs2, v4
-; CHECK-BE-NEXT: xvcvsxwdp vs3, v5
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i32>, <8 x i32>* %0, align 32
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs0, 16(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-P9-NEXT: lxv vs2, 48(r4)
-; CHECK-P9-NEXT: lxv vs3, 32(r4)
-; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
-; CHECK-P9-NEXT: xxmrglw v0, vs3, vs3
-; CHECK-P9-NEXT: xxmrghw v1, vs3, vs3
-; CHECK-P9-NEXT: xxmrglw v6, vs2, vs2
-; CHECK-P9-NEXT: xxmrghw v7, vs2, vs2
+; CHECK-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-P9-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT: lxv vs2, 16(r4)
+; CHECK-P9-NEXT: lxv vs4, 48(r4)
+; CHECK-P9-NEXT: xvcvsxwdp vs1, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT: lxv vs5, 32(r4)
; CHECK-P9-NEXT: xvcvsxwdp vs0, v2
-; CHECK-P9-NEXT: xvcvsxwdp vs1, v3
-; CHECK-P9-NEXT: xvcvsxwdp vs2, v4
-; CHECK-P9-NEXT: xvcvsxwdp vs3, v5
-; CHECK-P9-NEXT: xvcvsxwdp vs4, v0
-; CHECK-P9-NEXT: xvcvsxwdp vs5, v1
-; CHECK-P9-NEXT: xvcvsxwdp vs6, v6
-; CHECK-P9-NEXT: xvcvsxwdp vs7, v7
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs7, 112(r3)
-; CHECK-P9-NEXT: stxv vs6, 96(r3)
+; CHECK-P9-NEXT: xxmrglw v2, vs2, vs2
+; CHECK-P9-NEXT: xvcvsxwdp vs3, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs2, vs2
+; CHECK-P9-NEXT: stxv vs1, 0(r3)
+; CHECK-P9-NEXT: stxv vs0, 16(r3)
+; CHECK-P9-NEXT: xvcvsxwdp vs2, v2
+; CHECK-P9-NEXT: xxmrglw v2, vs5, vs5
+; CHECK-P9-NEXT: xvcvsxwdp vs6, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs5, vs5
+; CHECK-P9-NEXT: stxv vs3, 32(r3)
+; CHECK-P9-NEXT: stxv vs2, 48(r3)
+; CHECK-P9-NEXT: xvcvsxwdp vs5, v2
+; CHECK-P9-NEXT: xxmrglw v2, vs4, vs4
+; CHECK-P9-NEXT: xvcvsxwdp vs7, v2
+; CHECK-P9-NEXT: xxmrghw v2, vs4, vs4
+; CHECK-P9-NEXT: stxv vs6, 64(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
-; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: xvcvsxwdp vs4, v2
+; CHECK-P9-NEXT: stxv vs7, 96(r3)
+; CHECK-P9-NEXT: stxv vs4, 112(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv vs0, 16(r4)
-; CHECK-BE-NEXT: lxv vs1, 0(r4)
-; CHECK-BE-NEXT: lxv vs2, 48(r4)
-; CHECK-BE-NEXT: lxv vs3, 32(r4)
-; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
-; CHECK-BE-NEXT: xxmrghw v0, vs3, vs3
-; CHECK-BE-NEXT: xxmrglw v1, vs3, vs3
-; CHECK-BE-NEXT: xxmrghw v6, vs2, vs2
-; CHECK-BE-NEXT: xxmrglw v7, vs2, vs2
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT: lxv vs2, 16(r4)
+; CHECK-BE-NEXT: lxv vs4, 48(r4)
+; CHECK-BE-NEXT: xvcvsxwdp vs1, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT: lxv vs5, 32(r4)
; CHECK-BE-NEXT: xvcvsxwdp vs0, v2
-; CHECK-BE-NEXT: xvcvsxwdp vs1, v3
-; CHECK-BE-NEXT: xvcvsxwdp vs2, v4
-; CHECK-BE-NEXT: xvcvsxwdp vs3, v5
-; CHECK-BE-NEXT: xvcvsxwdp vs4, v0
-; CHECK-BE-NEXT: xvcvsxwdp vs5, v1
-; CHECK-BE-NEXT: xvcvsxwdp vs6, v6
-; CHECK-BE-NEXT: xvcvsxwdp vs7, v7
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
-; CHECK-BE-NEXT: stxv vs7, 112(r3)
-; CHECK-BE-NEXT: stxv vs6, 96(r3)
+; CHECK-BE-NEXT: xxmrghw v2, vs2, vs2
+; CHECK-BE-NEXT: xvcvsxwdp vs3, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs2, vs2
+; CHECK-BE-NEXT: stxv vs1, 0(r3)
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: xvcvsxwdp vs2, v2
+; CHECK-BE-NEXT: xxmrghw v2, vs5, vs5
+; CHECK-BE-NEXT: xvcvsxwdp vs6, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs5, vs5
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: stxv vs2, 48(r3)
+; CHECK-BE-NEXT: xvcvsxwdp vs5, v2
+; CHECK-BE-NEXT: xxmrghw v2, vs4, vs4
+; CHECK-BE-NEXT: xvcvsxwdp vs7, v2
+; CHECK-BE-NEXT: xxmrglw v2, vs4, vs4
+; CHECK-BE-NEXT: stxv vs6, 64(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
-; CHECK-BE-NEXT: stxv vs4, 64(r3)
+; CHECK-BE-NEXT: xvcvsxwdp vs4, v2
+; CHECK-BE-NEXT: stxv vs7, 96(r3)
+; CHECK-BE-NEXT: stxv vs4, 112(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i32>, <16 x i32>* %0, align 64
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xxlor vs1, v2, v2
-; CHECK-P9-NEXT: xscvuxdsp f1, f1
; CHECK-P9-NEXT: xscvuxdsp f0, f0
-; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT: xxlor vs0, v2, v2
+; CHECK-P9-NEXT: xscvuxdsp f0, f0
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 16(r3)
; CHECK-P9-NEXT: lxv v3, 0(r3)
; CHECK-P9-NEXT: xvcvuxdsp vs0, v3
-; CHECK-P9-NEXT: xvcvuxdsp vs1, v2
+; CHECK-P9-NEXT: lxv v2, 16(r3)
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v2
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT: vpkudum v2, v3, v2
+; CHECK-P9-NEXT: vpkudum v2, v2, v3
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: xvcvuxdsp vs0, v3
-; CHECK-BE-NEXT: xvcvuxdsp vs1, v2
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v2
; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT: vpkudum v2, v3, v2
+; CHECK-BE-NEXT: vpkudum v2, v2, v3
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x i64>, <4 x i64>* %0, align 32
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 48(r4)
-; CHECK-P9-NEXT: lxv v3, 32(r4)
-; CHECK-P9-NEXT: lxv v4, 16(r4)
; CHECK-P9-NEXT: lxv v5, 0(r4)
; CHECK-P9-NEXT: xvcvuxdsp vs0, v5
-; CHECK-P9-NEXT: xvcvuxdsp vs1, v4
-; CHECK-P9-NEXT: xvcvuxdsp vs2, v3
-; CHECK-P9-NEXT: xvcvuxdsp vs3, v2
+; CHECK-P9-NEXT: lxv v4, 16(r4)
+; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v4
+; CHECK-P9-NEXT: lxv v3, 32(r4)
+; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v3
+; CHECK-P9-NEXT: lxv v2, 48(r4)
+; CHECK-P9-NEXT: vpkudum v3, v4, v5
+; CHECK-P9-NEXT: stxv v3, 0(r3)
+; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v2
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT: vpkudum v2, v3, v2
-; CHECK-P9-NEXT: vpkudum v3, v5, v4
-; CHECK-P9-NEXT: stxv v3, 16(r3)
-; CHECK-P9-NEXT: stxv v2, 0(r3)
+; CHECK-P9-NEXT: vpkudum v2, v2, v4
+; CHECK-P9-NEXT: stxv v2, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 32(r4)
-; CHECK-BE-NEXT: lxv v3, 48(r4)
-; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: lxv v5, 16(r4)
; CHECK-BE-NEXT: xvcvuxdsp vs0, v5
-; CHECK-BE-NEXT: xvcvuxdsp vs1, v4
-; CHECK-BE-NEXT: xvcvuxdsp vs2, v3
-; CHECK-BE-NEXT: xvcvuxdsp vs3, v2
+; CHECK-BE-NEXT: lxv v4, 0(r4)
+; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v4
+; CHECK-BE-NEXT: lxv v3, 48(r4)
+; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v3
+; CHECK-BE-NEXT: lxv v2, 32(r4)
+; CHECK-BE-NEXT: vpkudum v3, v4, v5
+; CHECK-BE-NEXT: stxv v3, 0(r3)
+; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v2
; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT: vpkudum v2, v3, v2
-; CHECK-BE-NEXT: vpkudum v3, v5, v4
-; CHECK-BE-NEXT: stxv v3, 16(r3)
-; CHECK-BE-NEXT: stxv v2, 0(r3)
+; CHECK-BE-NEXT: vpkudum v2, v2, v4
+; CHECK-BE-NEXT: stxv v2, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i64>, <8 x i64>* %0, align 64
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 48(r4)
-; CHECK-P9-NEXT: lxv v3, 32(r4)
-; CHECK-P9-NEXT: lxv v4, 16(r4)
-; CHECK-P9-NEXT: lxv v5, 0(r4)
-; CHECK-P9-NEXT: lxv v0, 112(r4)
-; CHECK-P9-NEXT: lxv v1, 96(r4)
-; CHECK-P9-NEXT: lxv v6, 80(r4)
-; CHECK-P9-NEXT: lxv v7, 64(r4)
+; CHECK-P9-NEXT: lxv v7, 0(r4)
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v7
+; CHECK-P9-NEXT: lxv v6, 16(r4)
+; CHECK-P9-NEXT: xxsldwi v7, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v6
+; CHECK-P9-NEXT: lxv v1, 32(r4)
+; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v1
+; CHECK-P9-NEXT: lxv v0, 48(r4)
+; CHECK-P9-NEXT: vpkudum v1, v6, v7
+; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v0
+; CHECK-P9-NEXT: lxv v5, 64(r4)
+; CHECK-P9-NEXT: xxsldwi v0, vs0, vs0, 3
; CHECK-P9-NEXT: xvcvuxdsp vs0, v5
-; CHECK-P9-NEXT: xvcvuxdsp vs1, v4
-; CHECK-P9-NEXT: xvcvuxdsp vs2, v3
-; CHECK-P9-NEXT: xvcvuxdsp vs3, v2
-; CHECK-P9-NEXT: xvcvuxdsp vs4, v7
-; CHECK-P9-NEXT: xvcvuxdsp vs5, v6
-; CHECK-P9-NEXT: xvcvuxdsp vs6, v1
-; CHECK-P9-NEXT: xvcvuxdsp vs7, v0
-; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT: xxsldwi v0, vs4, vs4, 3
-; CHECK-P9-NEXT: xxsldwi v1, vs5, vs5, 3
-; CHECK-P9-NEXT: xxsldwi v6, vs6, vs6, 3
-; CHECK-P9-NEXT: xxsldwi v7, vs7, vs7, 3
-; CHECK-P9-NEXT: vpkudum v2, v3, v2
-; CHECK-P9-NEXT: vpkudum v3, v5, v4
-; CHECK-P9-NEXT: vpkudum v4, v1, v0
-; CHECK-P9-NEXT: vpkudum v5, v7, v6
-; CHECK-P9-NEXT: stxv v3, 16(r3)
-; CHECK-P9-NEXT: stxv v2, 0(r3)
-; CHECK-P9-NEXT: stxv v5, 48(r3)
+; CHECK-P9-NEXT: lxv v4, 80(r4)
+; CHECK-P9-NEXT: vpkudum v0, v0, v6
+; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT: lxv v3, 96(r4)
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v4
+; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v3
+; CHECK-P9-NEXT: lxv v2, 112(r4)
+; CHECK-P9-NEXT: stxv v0, 16(r3)
+; CHECK-P9-NEXT: stxv v1, 0(r3)
+; CHECK-P9-NEXT: vpkudum v4, v4, v5
; CHECK-P9-NEXT: stxv v4, 32(r3)
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvuxdsp vs0, v2
+; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT: vpkudum v2, v2, v3
+; CHECK-P9-NEXT: stxv v2, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 32(r4)
-; CHECK-BE-NEXT: lxv v3, 48(r4)
-; CHECK-BE-NEXT: lxv v4, 0(r4)
-; CHECK-BE-NEXT: lxv v5, 16(r4)
-; CHECK-BE-NEXT: lxv v0, 96(r4)
-; CHECK-BE-NEXT: lxv v1, 112(r4)
-; CHECK-BE-NEXT: lxv v6, 64(r4)
-; CHECK-BE-NEXT: lxv v7, 80(r4)
+; CHECK-BE-NEXT: lxv v7, 16(r4)
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v7
+; CHECK-BE-NEXT: lxv v6, 0(r4)
+; CHECK-BE-NEXT: xxsldwi v7, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v6
+; CHECK-BE-NEXT: lxv v1, 48(r4)
+; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v1
+; CHECK-BE-NEXT: lxv v0, 32(r4)
+; CHECK-BE-NEXT: vpkudum v1, v6, v7
+; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v0
+; CHECK-BE-NEXT: lxv v5, 80(r4)
+; CHECK-BE-NEXT: xxsldwi v0, vs0, vs0, 3
; CHECK-BE-NEXT: xvcvuxdsp vs0, v5
-; CHECK-BE-NEXT: xvcvuxdsp vs1, v4
-; CHECK-BE-NEXT: xvcvuxdsp vs2, v3
-; CHECK-BE-NEXT: xvcvuxdsp vs3, v2
-; CHECK-BE-NEXT: xvcvuxdsp vs4, v7
-; CHECK-BE-NEXT: xvcvuxdsp vs5, v6
-; CHECK-BE-NEXT: xvcvuxdsp vs6, v1
-; CHECK-BE-NEXT: xvcvuxdsp vs7, v0
-; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT: xxsldwi v0, vs4, vs4, 3
-; CHECK-BE-NEXT: xxsldwi v1, vs5, vs5, 3
-; CHECK-BE-NEXT: xxsldwi v6, vs6, vs6, 3
-; CHECK-BE-NEXT: xxsldwi v7, vs7, vs7, 3
-; CHECK-BE-NEXT: vpkudum v2, v3, v2
-; CHECK-BE-NEXT: vpkudum v3, v5, v4
-; CHECK-BE-NEXT: vpkudum v4, v1, v0
-; CHECK-BE-NEXT: vpkudum v5, v7, v6
-; CHECK-BE-NEXT: stxv v3, 16(r3)
-; CHECK-BE-NEXT: stxv v2, 0(r3)
-; CHECK-BE-NEXT: stxv v5, 48(r3)
+; CHECK-BE-NEXT: lxv v4, 64(r4)
+; CHECK-BE-NEXT: vpkudum v0, v0, v6
+; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT: lxv v3, 112(r4)
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v4
+; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v3
+; CHECK-BE-NEXT: lxv v2, 96(r4)
+; CHECK-BE-NEXT: stxv v0, 16(r3)
+; CHECK-BE-NEXT: stxv v1, 0(r3)
+; CHECK-BE-NEXT: vpkudum v4, v4, v5
; CHECK-BE-NEXT: stxv v4, 32(r3)
+; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvuxdsp vs0, v2
+; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT: vpkudum v2, v2, v3
+; CHECK-BE-NEXT: stxv v2, 48(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: xxlor vs1, v2, v2
-; CHECK-P9-NEXT: xscvsxdsp f1, f1
; CHECK-P9-NEXT: xscvsxdsp f0, f0
-; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT: xxlor vs0, v2, v2
+; CHECK-P9-NEXT: xscvsxdsp f0, f0
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 16(r3)
; CHECK-P9-NEXT: lxv v3, 0(r3)
; CHECK-P9-NEXT: xvcvsxdsp vs0, v3
-; CHECK-P9-NEXT: xvcvsxdsp vs1, v2
+; CHECK-P9-NEXT: lxv v2, 16(r3)
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v2
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT: vpkudum v2, v3, v2
+; CHECK-P9-NEXT: vpkudum v2, v2, v3
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: xvcvsxdsp vs0, v3
-; CHECK-BE-NEXT: xvcvsxdsp vs1, v2
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v2
; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT: vpkudum v2, v3, v2
+; CHECK-BE-NEXT: vpkudum v2, v2, v3
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x i64>, <4 x i64>* %0, align 32
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 48(r4)
-; CHECK-P9-NEXT: lxv v3, 32(r4)
-; CHECK-P9-NEXT: lxv v4, 16(r4)
; CHECK-P9-NEXT: lxv v5, 0(r4)
; CHECK-P9-NEXT: xvcvsxdsp vs0, v5
-; CHECK-P9-NEXT: xvcvsxdsp vs1, v4
-; CHECK-P9-NEXT: xvcvsxdsp vs2, v3
-; CHECK-P9-NEXT: xvcvsxdsp vs3, v2
+; CHECK-P9-NEXT: lxv v4, 16(r4)
+; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v4
+; CHECK-P9-NEXT: lxv v3, 32(r4)
+; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v3
+; CHECK-P9-NEXT: lxv v2, 48(r4)
+; CHECK-P9-NEXT: vpkudum v3, v4, v5
+; CHECK-P9-NEXT: stxv v3, 0(r3)
+; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v2
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT: vpkudum v2, v3, v2
-; CHECK-P9-NEXT: vpkudum v3, v5, v4
-; CHECK-P9-NEXT: stxv v3, 16(r3)
-; CHECK-P9-NEXT: stxv v2, 0(r3)
+; CHECK-P9-NEXT: vpkudum v2, v2, v4
+; CHECK-P9-NEXT: stxv v2, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 32(r4)
-; CHECK-BE-NEXT: lxv v3, 48(r4)
-; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: lxv v5, 16(r4)
; CHECK-BE-NEXT: xvcvsxdsp vs0, v5
-; CHECK-BE-NEXT: xvcvsxdsp vs1, v4
-; CHECK-BE-NEXT: xvcvsxdsp vs2, v3
-; CHECK-BE-NEXT: xvcvsxdsp vs3, v2
+; CHECK-BE-NEXT: lxv v4, 0(r4)
+; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v4
+; CHECK-BE-NEXT: lxv v3, 48(r4)
+; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v3
+; CHECK-BE-NEXT: lxv v2, 32(r4)
+; CHECK-BE-NEXT: vpkudum v3, v4, v5
+; CHECK-BE-NEXT: stxv v3, 0(r3)
+; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v2
; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT: vpkudum v2, v3, v2
-; CHECK-BE-NEXT: vpkudum v3, v5, v4
-; CHECK-BE-NEXT: stxv v3, 16(r3)
-; CHECK-BE-NEXT: stxv v2, 0(r3)
+; CHECK-BE-NEXT: vpkudum v2, v2, v4
+; CHECK-BE-NEXT: stxv v2, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i64>, <8 x i64>* %0, align 64
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 48(r4)
-; CHECK-P9-NEXT: lxv v3, 32(r4)
-; CHECK-P9-NEXT: lxv v4, 16(r4)
-; CHECK-P9-NEXT: lxv v5, 0(r4)
-; CHECK-P9-NEXT: lxv v0, 112(r4)
-; CHECK-P9-NEXT: lxv v1, 96(r4)
-; CHECK-P9-NEXT: lxv v6, 80(r4)
-; CHECK-P9-NEXT: lxv v7, 64(r4)
+; CHECK-P9-NEXT: lxv v7, 0(r4)
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v7
+; CHECK-P9-NEXT: lxv v6, 16(r4)
+; CHECK-P9-NEXT: xxsldwi v7, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v6
+; CHECK-P9-NEXT: lxv v1, 32(r4)
+; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v1
+; CHECK-P9-NEXT: lxv v0, 48(r4)
+; CHECK-P9-NEXT: vpkudum v1, v6, v7
+; CHECK-P9-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v0
+; CHECK-P9-NEXT: lxv v5, 64(r4)
+; CHECK-P9-NEXT: xxsldwi v0, vs0, vs0, 3
; CHECK-P9-NEXT: xvcvsxdsp vs0, v5
-; CHECK-P9-NEXT: xvcvsxdsp vs1, v4
-; CHECK-P9-NEXT: xvcvsxdsp vs2, v3
-; CHECK-P9-NEXT: xvcvsxdsp vs3, v2
-; CHECK-P9-NEXT: xvcvsxdsp vs4, v7
-; CHECK-P9-NEXT: xvcvsxdsp vs5, v6
-; CHECK-P9-NEXT: xvcvsxdsp vs6, v1
-; CHECK-P9-NEXT: xvcvsxdsp vs7, v0
-; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT: xxsldwi v0, vs4, vs4, 3
-; CHECK-P9-NEXT: xxsldwi v1, vs5, vs5, 3
-; CHECK-P9-NEXT: xxsldwi v6, vs6, vs6, 3
-; CHECK-P9-NEXT: xxsldwi v7, vs7, vs7, 3
-; CHECK-P9-NEXT: vpkudum v2, v3, v2
-; CHECK-P9-NEXT: vpkudum v3, v5, v4
-; CHECK-P9-NEXT: vpkudum v4, v1, v0
-; CHECK-P9-NEXT: vpkudum v5, v7, v6
-; CHECK-P9-NEXT: stxv v3, 16(r3)
-; CHECK-P9-NEXT: stxv v2, 0(r3)
-; CHECK-P9-NEXT: stxv v5, 48(r3)
+; CHECK-P9-NEXT: lxv v4, 80(r4)
+; CHECK-P9-NEXT: vpkudum v0, v0, v6
+; CHECK-P9-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT: lxv v3, 96(r4)
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v4
+; CHECK-P9-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v3
+; CHECK-P9-NEXT: lxv v2, 112(r4)
+; CHECK-P9-NEXT: stxv v0, 16(r3)
+; CHECK-P9-NEXT: stxv v1, 0(r3)
+; CHECK-P9-NEXT: vpkudum v4, v4, v5
; CHECK-P9-NEXT: stxv v4, 32(r3)
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT: xvcvsxdsp vs0, v2
+; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT: vpkudum v2, v2, v3
+; CHECK-P9-NEXT: stxv v2, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 32(r4)
-; CHECK-BE-NEXT: lxv v3, 48(r4)
-; CHECK-BE-NEXT: lxv v4, 0(r4)
-; CHECK-BE-NEXT: lxv v5, 16(r4)
-; CHECK-BE-NEXT: lxv v0, 96(r4)
-; CHECK-BE-NEXT: lxv v1, 112(r4)
-; CHECK-BE-NEXT: lxv v6, 64(r4)
-; CHECK-BE-NEXT: lxv v7, 80(r4)
+; CHECK-BE-NEXT: lxv v7, 16(r4)
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v7
+; CHECK-BE-NEXT: lxv v6, 0(r4)
+; CHECK-BE-NEXT: xxsldwi v7, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v6
+; CHECK-BE-NEXT: lxv v1, 48(r4)
+; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v1
+; CHECK-BE-NEXT: lxv v0, 32(r4)
+; CHECK-BE-NEXT: vpkudum v1, v6, v7
+; CHECK-BE-NEXT: xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v0
+; CHECK-BE-NEXT: lxv v5, 80(r4)
+; CHECK-BE-NEXT: xxsldwi v0, vs0, vs0, 3
; CHECK-BE-NEXT: xvcvsxdsp vs0, v5
-; CHECK-BE-NEXT: xvcvsxdsp vs1, v4
-; CHECK-BE-NEXT: xvcvsxdsp vs2, v3
-; CHECK-BE-NEXT: xvcvsxdsp vs3, v2
-; CHECK-BE-NEXT: xvcvsxdsp vs4, v7
-; CHECK-BE-NEXT: xvcvsxdsp vs5, v6
-; CHECK-BE-NEXT: xvcvsxdsp vs6, v1
-; CHECK-BE-NEXT: xvcvsxdsp vs7, v0
-; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT: xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT: xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT: xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT: xxsldwi v0, vs4, vs4, 3
-; CHECK-BE-NEXT: xxsldwi v1, vs5, vs5, 3
-; CHECK-BE-NEXT: xxsldwi v6, vs6, vs6, 3
-; CHECK-BE-NEXT: xxsldwi v7, vs7, vs7, 3
-; CHECK-BE-NEXT: vpkudum v2, v3, v2
-; CHECK-BE-NEXT: vpkudum v3, v5, v4
-; CHECK-BE-NEXT: vpkudum v4, v1, v0
-; CHECK-BE-NEXT: vpkudum v5, v7, v6
-; CHECK-BE-NEXT: stxv v3, 16(r3)
-; CHECK-BE-NEXT: stxv v2, 0(r3)
-; CHECK-BE-NEXT: stxv v5, 48(r3)
+; CHECK-BE-NEXT: lxv v4, 64(r4)
+; CHECK-BE-NEXT: vpkudum v0, v0, v6
+; CHECK-BE-NEXT: xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT: lxv v3, 112(r4)
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v4
+; CHECK-BE-NEXT: xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v3
+; CHECK-BE-NEXT: lxv v2, 96(r4)
+; CHECK-BE-NEXT: stxv v0, 16(r3)
+; CHECK-BE-NEXT: stxv v1, 0(r3)
+; CHECK-BE-NEXT: vpkudum v4, v4, v5
; CHECK-BE-NEXT: stxv v4, 32(r3)
+; CHECK-BE-NEXT: xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT: xvcvsxdsp vs0, v2
+; CHECK-BE-NEXT: xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT: vpkudum v2, v2, v3
+; CHECK-BE-NEXT: stxv v2, 48(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: vextubrx r3, r3, v2
-; CHECK-P9-NEXT: vextubrx r4, r4, v2
; CHECK-P9-NEXT: rlwinm r3, r3, 0, 24, 31
-; CHECK-P9-NEXT: rlwinm r4, r4, 0, 24, 31
; CHECK-P9-NEXT: mtvsrwz f0, r3
-; CHECK-P9-NEXT: mtvsrwz f1, r4
+; CHECK-P9-NEXT: li r3, 1
+; CHECK-P9-NEXT: xscvuxdsp f0, f0
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: vextubrx r3, r3, v2
+; CHECK-P9-NEXT: rlwinm r3, r3, 0, 24, 31
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT: mtvsrwz f0, r3
; CHECK-P9-NEXT: xscvuxdsp f0, f0
-; CHECK-P9-NEXT: xscvuxdsp f1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: vextublx r3, r3, v2
-; CHECK-BE-NEXT: vextublx r4, r4, v2
; CHECK-BE-NEXT: rlwinm r3, r3, 0, 24, 31
-; CHECK-BE-NEXT: rlwinm r4, r4, 0, 24, 31
; CHECK-BE-NEXT: mtvsrwz f0, r3
-; CHECK-BE-NEXT: mtvsrwz f1, r4
+; CHECK-BE-NEXT: li r3, 0
+; CHECK-BE-NEXT: xscvuxdsp f0, f0
+; CHECK-BE-NEXT: vextublx r3, r3, v2
+; CHECK-BE-NEXT: rlwinm r3, r3, 0, 24, 31
+; CHECK-BE-NEXT: xscvdpspn v3, f0
+; CHECK-BE-NEXT: mtvsrwz f0, r3
; CHECK-BE-NEXT: xscvuxdsp f0, f0
-; CHECK-BE-NEXT: xscvuxdsp f1, f1
; CHECK-BE-NEXT: xscvdpspn v2, f0
-; CHECK-BE-NEXT: xscvdpspn v3, f1
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha
-; CHECK-P9-NEXT: mtvsrws v3, r3
+; CHECK-P9-NEXT: mtvsrws v2, r3
+; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r3
; CHECK-P9-NEXT: xxlxor v4, v4, v4
-; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r4
-; CHECK-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxwsp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
-; CHECK-BE-NEXT: mtvsrws v3, r3
+; CHECK-BE-NEXT: mtvsrws v2, r3
+; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r3
; CHECK-BE-NEXT: xxlxor v4, v4, v4
-; CHECK-BE-NEXT: addi r4, r4, .LCPI1_0@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r4
-; CHECK-BE-NEXT: vperm v2, v3, v4, v2
+; CHECK-BE-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxwsp v2, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI2_1@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
-; CHECK-P9-NEXT: xxlxor v5, v5, v5
-; CHECK-P9-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI2_1@toc@l
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: vperm v2, v5, v4, v2
-; CHECK-P9-NEXT: vperm v3, v5, v4, v3
-; CHECK-P9-NEXT: xvcvuxwsp vs0, v2
-; CHECK-P9-NEXT: xvcvuxwsp vs1, v3
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: xvcvuxwsp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxwsp vs1, v2
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI2_1@toc@ha
-; CHECK-BE-NEXT: mtvsrd v4, r4
-; CHECK-BE-NEXT: xxlxor v5, v5, v5
-; CHECK-BE-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI2_1@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: vperm v2, v4, v5, v2
-; CHECK-BE-NEXT: vperm v3, v5, v4, v3
-; CHECK-BE-NEXT: xvcvuxwsp vs0, v2
-; CHECK-BE-NEXT: xvcvuxwsp vs1, v3
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
+; CHECK-BE-NEXT: xvcvuxwsp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxwsp vs1, v2
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <8 x i8>
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha
-; CHECK-P9-NEXT: addis r5, r2, .LCPI3_1@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI3_2@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI3_3@toc@ha
-; CHECK-P9-NEXT: xxlxor v1, v1, v1
; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l
-; CHECK-P9-NEXT: addi r5, r5, .LCPI3_1@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI3_2@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI3_3@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: lxvx v5, 0, r6
-; CHECK-P9-NEXT: lxvx v0, 0, r7
-; CHECK-P9-NEXT: vperm v3, v1, v2, v3
-; CHECK-P9-NEXT: vperm v4, v1, v2, v4
-; CHECK-P9-NEXT: vperm v5, v1, v2, v5
-; CHECK-P9-NEXT: vperm v2, v1, v2, v0
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxwsp vs0, v3
-; CHECK-P9-NEXT: xvcvuxwsp vs1, v4
-; CHECK-P9-NEXT: xvcvuxwsp vs2, v5
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxwsp vs1, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: xvcvuxwsp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: xvcvuxwsp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
-; CHECK-BE-NEXT: addis r5, r2, .LCPI3_1@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI3_2@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI3_3@toc@ha
-; CHECK-BE-NEXT: xxlxor v1, v1, v1
; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l
-; CHECK-BE-NEXT: addi r5, r5, .LCPI3_1@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI3_2@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI3_3@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: lxvx v0, 0, r7
-; CHECK-BE-NEXT: vperm v3, v2, v1, v3
-; CHECK-BE-NEXT: vperm v4, v1, v2, v4
-; CHECK-BE-NEXT: vperm v5, v1, v2, v5
-; CHECK-BE-NEXT: vperm v2, v1, v2, v0
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxwsp vs0, v3
-; CHECK-BE-NEXT: xvcvuxwsp vs1, v4
-; CHECK-BE-NEXT: xvcvuxwsp vs2, v5
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_2@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxwsp vs1, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_3@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: xvcvuxwsp vs2, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: xvcvuxwsp vs3, v2
; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <16 x i8> %a to <16 x float>
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: li r3, 0
-; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: vextubrx r3, r3, v2
-; CHECK-P9-NEXT: vextubrx r4, r4, v2
; CHECK-P9-NEXT: extsb r3, r3
-; CHECK-P9-NEXT: extsb r4, r4
; CHECK-P9-NEXT: mtvsrwa f0, r3
-; CHECK-P9-NEXT: mtvsrwa f1, r4
+; CHECK-P9-NEXT: li r3, 1
+; CHECK-P9-NEXT: xscvsxdsp f0, f0
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: vextubrx r3, r3, v2
+; CHECK-P9-NEXT: extsb r3, r3
+; CHECK-P9-NEXT: xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT: mtvsrwa f0, r3
; CHECK-P9-NEXT: xscvsxdsp f0, f0
-; CHECK-P9-NEXT: xscvsxdsp f1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT: vmrglw v2, v3, v2
+; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrws v2, r3
; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: vextublx r3, r3, v2
-; CHECK-BE-NEXT: vextublx r4, r4, v2
; CHECK-BE-NEXT: extsb r3, r3
-; CHECK-BE-NEXT: extsb r4, r4
; CHECK-BE-NEXT: mtvsrwa f0, r3
-; CHECK-BE-NEXT: mtvsrwa f1, r4
+; CHECK-BE-NEXT: li r3, 0
+; CHECK-BE-NEXT: xscvsxdsp f0, f0
+; CHECK-BE-NEXT: vextublx r3, r3, v2
+; CHECK-BE-NEXT: extsb r3, r3
+; CHECK-BE-NEXT: xscvdpspn v3, f0
+; CHECK-BE-NEXT: mtvsrwa f0, r3
; CHECK-BE-NEXT: xscvsxdsp f0, f0
-; CHECK-BE-NEXT: xscvsxdsp f1, f1
; CHECK-BE-NEXT: xscvdpspn v2, f0
-; CHECK-BE-NEXT: xscvdpspn v3, f1
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
+; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r4, r2, .LCPI5_0@toc@ha
-; CHECK-P9-NEXT: mtvsrws v3, r3
-; CHECK-P9-NEXT: addi r4, r4, .LCPI5_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r4
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
+; CHECK-P9-NEXT: mtvsrws v2, r3
+; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r3
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
; CHECK-P9-NEXT: vextsb2w v2, v2
; CHECK-P9-NEXT: xvcvsxwsp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
-; CHECK-BE-NEXT: mtvsrws v3, r3
-; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r4
-; CHECK-BE-NEXT: vperm v2, v3, v3, v2
+; CHECK-BE-NEXT: mtvsrws v2, r3
+; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r3
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: vextsb2w v2, v2
; CHECK-BE-NEXT: xvcvsxwsp v2, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI6_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI6_1@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
-; CHECK-P9-NEXT: addi r5, r5, .LCPI6_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI6_1@toc@l
-; CHECK-P9-NEXT: xxswapd v4, vs0
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: vperm v2, v4, v4, v2
-; CHECK-P9-NEXT: vperm v3, v4, v4, v3
-; CHECK-P9-NEXT: vextsb2w v2, v2
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_1@toc@l
; CHECK-P9-NEXT: vextsb2w v3, v3
-; CHECK-P9-NEXT: xvcvsxwsp vs0, v2
-; CHECK-P9-NEXT: xvcvsxwsp vs1, v3
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: xvcvsxwsp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: vextsb2w v2, v2
+; CHECK-P9-NEXT: xvcvsxwsp vs1, v2
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI6_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI6_1@toc@ha
-; CHECK-BE-NEXT: mtvsrd v4, r4
-; CHECK-BE-NEXT: xxlxor v5, v5, v5
-; CHECK-BE-NEXT: addi r5, r5, .LCPI6_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI6_1@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: vperm v2, v5, v4, v2
-; CHECK-BE-NEXT: vperm v3, v4, v4, v3
-; CHECK-BE-NEXT: vextsb2w v2, v2
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-NEXT: vperm v3, v3, v2, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha
; CHECK-BE-NEXT: vextsb2w v3, v3
-; CHECK-BE-NEXT: xvcvsxwsp vs0, v2
-; CHECK-BE-NEXT: xvcvsxwsp vs1, v3
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l
+; CHECK-BE-NEXT: xvcvsxwsp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: vextsb2w v2, v2
+; CHECK-BE-NEXT: xvcvsxwsp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI7_0@toc@ha
-; CHECK-P9-NEXT: addis r5, r2, .LCPI7_1@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI7_2@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI7_3@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI7_0@toc@l
-; CHECK-P9-NEXT: addi r5, r5, .LCPI7_1@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI7_2@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI7_3@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: lxvx v5, 0, r6
-; CHECK-P9-NEXT: lxvx v0, 0, r7
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_1@toc@l
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
-; CHECK-P9-NEXT: vperm v4, v2, v2, v4
-; CHECK-P9-NEXT: vperm v5, v2, v2, v5
-; CHECK-P9-NEXT: vperm v2, v2, v2, v0
; CHECK-P9-NEXT: vextsb2w v3, v3
-; CHECK-P9-NEXT: vextsb2w v4, v4
-; CHECK-P9-NEXT: vextsb2w v5, v5
-; CHECK-P9-NEXT: vextsb2w v2, v2
; CHECK-P9-NEXT: xvcvsxwsp vs0, v3
-; CHECK-P9-NEXT: xvcvsxwsp vs1, v4
-; CHECK-P9-NEXT: xvcvsxwsp vs2, v5
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: vextsb2w v3, v3
+; CHECK-P9-NEXT: xvcvsxwsp vs1, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: vextsb2w v3, v3
+; CHECK-P9-NEXT: xvcvsxwsp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: vextsb2w v2, v2
; CHECK-P9-NEXT: xvcvsxwsp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha
-; CHECK-BE-NEXT: addis r5, r2, .LCPI7_1@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI7_2@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI7_3@toc@ha
-; CHECK-BE-NEXT: xxlxor v1, v1, v1
; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l
-; CHECK-BE-NEXT: addi r5, r5, .LCPI7_1@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI7_2@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI7_3@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: lxvx v0, 0, r7
-; CHECK-BE-NEXT: vperm v3, v1, v2, v3
-; CHECK-BE-NEXT: vperm v4, v1, v2, v4
-; CHECK-BE-NEXT: vperm v5, v2, v2, v5
-; CHECK-BE-NEXT: vperm v2, v2, v2, v0
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_1@toc@l
; CHECK-BE-NEXT: vextsb2w v3, v3
-; CHECK-BE-NEXT: vextsb2w v4, v4
-; CHECK-BE-NEXT: vextsb2w v5, v5
-; CHECK-BE-NEXT: vextsb2w v2, v2
; CHECK-BE-NEXT: xvcvsxwsp vs0, v3
-; CHECK-BE-NEXT: xvcvsxwsp vs1, v4
-; CHECK-BE-NEXT: xvcvsxwsp vs2, v5
-; CHECK-BE-NEXT: xvcvsxwsp vs3, v2
-; CHECK-BE-NEXT: stxv vs1, 48(r3)
-; CHECK-BE-NEXT: stxv vs3, 32(r3)
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: vextsb2w v3, v3
+; CHECK-BE-NEXT: xvcvsxwsp vs1, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v2, v3
+; CHECK-BE-NEXT: stxv vs1, 48(r3)
+; CHECK-BE-NEXT: vextsb2w v3, v3
+; CHECK-BE-NEXT: xvcvsxwsp vs2, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxv vs2, 0(r3)
+; CHECK-BE-NEXT: vextsb2w v2, v2
+; CHECK-BE-NEXT: xvcvsxwsp vs3, v2
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <16 x i8> %a to <16 x float>
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r4, r2, .LCPI0_0@toc@ha
-; CHECK-P9-NEXT: mtvsrws v3, r3
+; CHECK-P9-NEXT: mtvsrws v2, r3
+; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r3
; CHECK-P9-NEXT: xxlxor v4, v4, v4
-; CHECK-P9-NEXT: addi r4, r4, .LCPI0_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r4
-; CHECK-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
-; CHECK-BE-NEXT: mtvsrws v3, r3
+; CHECK-BE-NEXT: mtvsrws v2, r3
+; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r3
; CHECK-BE-NEXT: xxlxor v4, v4, v4
-; CHECK-BE-NEXT: addi r4, r4, .LCPI0_0@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r4
-; CHECK-BE-NEXT: vperm v2, v3, v4, v2
+; CHECK-BE-NEXT: vperm v2, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxddp v2, v2
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI1_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI1_1@toc@ha
-; CHECK-P9-NEXT: mtvsrws v4, r4
-; CHECK-P9-NEXT: xxlxor v5, v5, v5
-; CHECK-P9-NEXT: addi r5, r5, .LCPI1_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI1_1@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: vperm v2, v5, v4, v2
-; CHECK-P9-NEXT: vperm v3, v5, v4, v3
-; CHECK-P9-NEXT: xvcvuxddp vs0, v2
-; CHECK-P9-NEXT: xvcvuxddp vs1, v3
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: mtvsrws v2, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI1_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI1_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI1_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: xvcvuxddp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs1, v2
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI1_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI1_1@toc@ha
-; CHECK-BE-NEXT: mtvsrws v4, r4
-; CHECK-BE-NEXT: xxlxor v5, v5, v5
-; CHECK-BE-NEXT: addi r5, r5, .LCPI1_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI1_1@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: vperm v2, v4, v5, v2
-; CHECK-BE-NEXT: vperm v3, v5, v4, v3
-; CHECK-BE-NEXT: xvcvuxddp vs0, v2
-; CHECK-BE-NEXT: xvcvuxddp vs1, v3
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: mtvsrws v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI1_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI1_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI1_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
+; CHECK-BE-NEXT: xvcvuxddp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs1, v2
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i32 %a.coerce to <4 x i8>
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI2_1@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI2_2@toc@ha
-; CHECK-P9-NEXT: addis r8, r2, .LCPI2_3@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
-; CHECK-P9-NEXT: xxlxor v1, v1, v1
-; CHECK-P9-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI2_1@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI2_2@toc@l
-; CHECK-P9-NEXT: addi r8, r8, .LCPI2_3@toc@l
-; CHECK-P9-NEXT: xxswapd v0, vs0
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: lxvx v4, 0, r7
-; CHECK-P9-NEXT: lxvx v5, 0, r8
-; CHECK-P9-NEXT: vperm v2, v1, v0, v2
-; CHECK-P9-NEXT: vperm v3, v1, v0, v3
-; CHECK-P9-NEXT: vperm v4, v1, v0, v4
-; CHECK-P9-NEXT: vperm v5, v1, v0, v5
-; CHECK-P9-NEXT: xvcvuxddp vs0, v2
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: xvcvuxddp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: xvcvuxddp vs1, v3
-; CHECK-P9-NEXT: xvcvuxddp vs2, v4
-; CHECK-P9-NEXT: xvcvuxddp vs3, v5
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI2_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI2_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs3, v2
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI2_1@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI2_2@toc@ha
-; CHECK-BE-NEXT: addis r8, r2, .LCPI2_3@toc@ha
-; CHECK-BE-NEXT: mtvsrd v0, r4
-; CHECK-BE-NEXT: xxlxor v1, v1, v1
-; CHECK-BE-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI2_1@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI2_2@toc@l
-; CHECK-BE-NEXT: addi r8, r8, .LCPI2_3@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: lxvx v4, 0, r7
-; CHECK-BE-NEXT: lxvx v5, 0, r8
-; CHECK-BE-NEXT: vperm v2, v0, v1, v2
-; CHECK-BE-NEXT: vperm v3, v1, v0, v3
-; CHECK-BE-NEXT: vperm v4, v1, v0, v4
-; CHECK-BE-NEXT: vperm v5, v1, v0, v5
-; CHECK-BE-NEXT: xvcvuxddp vs0, v2
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
+; CHECK-BE-NEXT: xvcvuxddp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_2@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: xvcvuxddp vs1, v3
-; CHECK-BE-NEXT: xvcvuxddp vs2, v4
-; CHECK-BE-NEXT: xvcvuxddp vs3, v5
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI2_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI2_3@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs2, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs3, v2
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <8 x i8>
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI3_0@toc@ha
-; CHECK-P9-NEXT: addis r5, r2, .LCPI3_1@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI3_2@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI3_3@toc@ha
-; CHECK-P9-NEXT: addis r8, r2, .LCPI3_4@toc@ha
-; CHECK-P9-NEXT: addis r9, r2, .LCPI3_5@toc@ha
-; CHECK-P9-NEXT: addis r10, r2, .LCPI3_6@toc@ha
-; CHECK-P9-NEXT: addis r11, r2, .LCPI3_7@toc@ha
-; CHECK-P9-NEXT: xxlxor v9, v9, v9
; CHECK-P9-NEXT: addi r4, r4, .LCPI3_0@toc@l
-; CHECK-P9-NEXT: addi r5, r5, .LCPI3_1@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI3_2@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI3_3@toc@l
-; CHECK-P9-NEXT: addi r8, r8, .LCPI3_4@toc@l
-; CHECK-P9-NEXT: addi r9, r9, .LCPI3_5@toc@l
-; CHECK-P9-NEXT: addi r10, r10, .LCPI3_6@toc@l
-; CHECK-P9-NEXT: addi r11, r11, .LCPI3_7@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: lxvx v5, 0, r6
-; CHECK-P9-NEXT: lxvx v0, 0, r7
-; CHECK-P9-NEXT: lxvx v1, 0, r8
-; CHECK-P9-NEXT: lxvx v6, 0, r9
-; CHECK-P9-NEXT: lxvx v7, 0, r10
-; CHECK-P9-NEXT: lxvx v8, 0, r11
-; CHECK-P9-NEXT: vperm v3, v9, v2, v3
-; CHECK-P9-NEXT: vperm v4, v9, v2, v4
-; CHECK-P9-NEXT: vperm v5, v9, v2, v5
-; CHECK-P9-NEXT: vperm v0, v9, v2, v0
-; CHECK-P9-NEXT: vperm v1, v9, v2, v1
-; CHECK-P9-NEXT: vperm v6, v9, v2, v6
-; CHECK-P9-NEXT: vperm v7, v9, v2, v7
-; CHECK-P9-NEXT: vperm v2, v9, v2, v8
+; CHECK-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: xvcvuxddp vs0, v3
-; CHECK-P9-NEXT: xvcvuxddp vs1, v4
-; CHECK-P9-NEXT: xvcvuxddp vs2, v5
-; CHECK-P9-NEXT: xvcvuxddp vs3, v0
-; CHECK-P9-NEXT: xvcvuxddp vs4, v1
-; CHECK-P9-NEXT: xvcvuxddp vs5, v6
-; CHECK-P9-NEXT: xvcvuxddp vs6, v7
-; CHECK-P9-NEXT: xvcvuxddp vs7, v2
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs7, 112(r3)
-; CHECK-P9-NEXT: stxv vs6, 96(r3)
-; CHECK-P9-NEXT: stxv vs5, 80(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs1, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_4@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_4@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs3, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_5@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_5@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs4, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_6@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_6@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs5, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI3_7@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI3_7@toc@l
+; CHECK-P9-NEXT: vperm v3, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs5, 80(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs6, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-P9-NEXT: stxv vs6, 96(r3)
+; CHECK-P9-NEXT: xvcvuxddp vs7, v2
+; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
-; CHECK-BE-NEXT: addis r5, r2, .LCPI3_1@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI3_2@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI3_3@toc@ha
-; CHECK-BE-NEXT: addis r8, r2, .LCPI3_4@toc@ha
-; CHECK-BE-NEXT: addis r9, r2, .LCPI3_5@toc@ha
-; CHECK-BE-NEXT: addis r10, r2, .LCPI3_6@toc@ha
-; CHECK-BE-NEXT: addis r11, r2, .LCPI3_7@toc@ha
-; CHECK-BE-NEXT: xxlxor v9, v9, v9
; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l
-; CHECK-BE-NEXT: addi r5, r5, .LCPI3_1@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI3_2@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI3_3@toc@l
-; CHECK-BE-NEXT: addi r8, r8, .LCPI3_4@toc@l
-; CHECK-BE-NEXT: addi r9, r9, .LCPI3_5@toc@l
-; CHECK-BE-NEXT: addi r10, r10, .LCPI3_6@toc@l
-; CHECK-BE-NEXT: addi r11, r11, .LCPI3_7@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: lxvx v0, 0, r7
-; CHECK-BE-NEXT: lxvx v1, 0, r8
-; CHECK-BE-NEXT: lxvx v6, 0, r9
-; CHECK-BE-NEXT: lxvx v7, 0, r10
-; CHECK-BE-NEXT: lxvx v8, 0, r11
-; CHECK-BE-NEXT: vperm v3, v2, v9, v3
-; CHECK-BE-NEXT: vperm v4, v9, v2, v4
-; CHECK-BE-NEXT: vperm v5, v9, v2, v5
-; CHECK-BE-NEXT: vperm v0, v9, v2, v0
-; CHECK-BE-NEXT: vperm v1, v9, v2, v1
-; CHECK-BE-NEXT: vperm v6, v9, v2, v6
-; CHECK-BE-NEXT: vperm v7, v9, v2, v7
-; CHECK-BE-NEXT: vperm v2, v9, v2, v8
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v4, v3
; CHECK-BE-NEXT: xvcvuxddp vs0, v3
-; CHECK-BE-NEXT: xvcvuxddp vs1, v4
-; CHECK-BE-NEXT: xvcvuxddp vs2, v5
-; CHECK-BE-NEXT: xvcvuxddp vs3, v0
-; CHECK-BE-NEXT: xvcvuxddp vs4, v1
-; CHECK-BE-NEXT: xvcvuxddp vs5, v6
-; CHECK-BE-NEXT: xvcvuxddp vs6, v7
-; CHECK-BE-NEXT: xvcvuxddp vs7, v2
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_2@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
; CHECK-BE-NEXT: stxv vs0, 0(r3)
-; CHECK-BE-NEXT: stxv vs7, 112(r3)
-; CHECK-BE-NEXT: stxv vs6, 96(r3)
-; CHECK-BE-NEXT: stxv vs5, 80(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs1, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_3@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs2, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_4@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_4@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs3, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_5@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_5@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs4, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_6@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_6@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
; CHECK-BE-NEXT: stxv vs4, 64(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs5, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI3_7@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI3_7@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs5, 80(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs6, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs6, 96(r3)
+; CHECK-BE-NEXT: xvcvuxddp vs7, v2
+; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <16 x i8> %a to <16 x double>
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r4, r2, .LCPI4_0@toc@ha
-; CHECK-P9-NEXT: mtvsrws v3, r3
-; CHECK-P9-NEXT: addi r4, r4, .LCPI4_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r4
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
+; CHECK-P9-NEXT: mtvsrws v2, r3
+; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r3
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
; CHECK-P9-NEXT: vextsb2d v2, v2
; CHECK-P9-NEXT: xvcvsxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha
-; CHECK-BE-NEXT: mtvsrws v3, r3
-; CHECK-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r4
-; CHECK-BE-NEXT: vperm v2, v3, v3, v2
+; CHECK-BE-NEXT: mtvsrws v2, r3
+; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r3
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: vextsb2d v2, v2
; CHECK-BE-NEXT: xvcvsxddp v2, v2
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI5_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI5_1@toc@ha
-; CHECK-P9-NEXT: mtvsrws v4, r4
-; CHECK-P9-NEXT: addi r5, r5, .LCPI5_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI5_1@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: vperm v2, v4, v4, v2
-; CHECK-P9-NEXT: vperm v3, v4, v4, v3
-; CHECK-P9-NEXT: vextsb2d v2, v2
+; CHECK-P9-NEXT: mtvsrws v2, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI5_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI5_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI5_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI5_1@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: vextsb2d v3, v3
-; CHECK-P9-NEXT: xvcvsxddp vs0, v2
-; CHECK-P9-NEXT: xvcvsxddp vs1, v3
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: xvcvsxddp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: vextsb2d v2, v2
+; CHECK-P9-NEXT: xvcvsxddp vs1, v2
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI5_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI5_1@toc@ha
-; CHECK-BE-NEXT: mtvsrws v4, r4
-; CHECK-BE-NEXT: xxlxor v5, v5, v5
-; CHECK-BE-NEXT: addi r5, r5, .LCPI5_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI5_1@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: vperm v2, v5, v4, v2
-; CHECK-BE-NEXT: vperm v3, v4, v4, v3
-; CHECK-BE-NEXT: vextsb2d v2, v2
+; CHECK-BE-NEXT: mtvsrws v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-NEXT: vperm v3, v3, v2, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-BE-NEXT: vextsb2d v3, v3
-; CHECK-BE-NEXT: xvcvsxddp vs0, v2
-; CHECK-BE-NEXT: xvcvsxddp vs1, v3
+; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l
+; CHECK-BE-NEXT: xvcvsxddp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: vextsb2d v2, v2
+; CHECK-BE-NEXT: xvcvsxddp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: addis r5, r2, .LCPI6_0@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI6_1@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI6_2@toc@ha
-; CHECK-P9-NEXT: addis r8, r2, .LCPI6_3@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
-; CHECK-P9-NEXT: addi r5, r5, .LCPI6_0@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI6_1@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI6_2@toc@l
-; CHECK-P9-NEXT: addi r8, r8, .LCPI6_3@toc@l
-; CHECK-P9-NEXT: xxswapd v0, vs0
-; CHECK-P9-NEXT: lxvx v2, 0, r5
-; CHECK-P9-NEXT: lxvx v3, 0, r6
-; CHECK-P9-NEXT: lxvx v4, 0, r7
-; CHECK-P9-NEXT: lxvx v5, 0, r8
-; CHECK-P9-NEXT: vperm v2, v0, v0, v2
-; CHECK-P9-NEXT: vperm v3, v0, v0, v3
-; CHECK-P9-NEXT: vperm v4, v0, v0, v4
-; CHECK-P9-NEXT: vperm v5, v0, v0, v5
-; CHECK-P9-NEXT: vextsb2d v2, v2
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_1@toc@l
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs0, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: vextsb2d v3, v3
-; CHECK-P9-NEXT: vextsb2d v4, v4
-; CHECK-P9-NEXT: vextsb2d v5, v5
-; CHECK-P9-NEXT: xvcvsxddp vs0, v2
; CHECK-P9-NEXT: xvcvsxddp vs1, v3
-; CHECK-P9-NEXT: xvcvsxddp vs2, v4
-; CHECK-P9-NEXT: xvcvsxddp vs3, v5
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI6_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI6_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: vextsb2d v2, v2
+; CHECK-P9-NEXT: xvcvsxddp vs3, v2
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: addis r5, r2, .LCPI6_0@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI6_1@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI6_2@toc@ha
-; CHECK-BE-NEXT: addis r8, r2, .LCPI6_3@toc@ha
-; CHECK-BE-NEXT: mtvsrd v0, r4
-; CHECK-BE-NEXT: xxlxor v1, v1, v1
-; CHECK-BE-NEXT: addi r5, r5, .LCPI6_0@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI6_1@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI6_2@toc@l
-; CHECK-BE-NEXT: addi r8, r8, .LCPI6_3@toc@l
-; CHECK-BE-NEXT: lxvx v2, 0, r5
-; CHECK-BE-NEXT: lxvx v3, 0, r6
-; CHECK-BE-NEXT: lxvx v4, 0, r7
-; CHECK-BE-NEXT: lxvx v5, 0, r8
-; CHECK-BE-NEXT: vperm v2, v1, v0, v2
-; CHECK-BE-NEXT: vperm v3, v1, v0, v3
-; CHECK-BE-NEXT: vperm v4, v0, v0, v4
-; CHECK-BE-NEXT: vperm v5, v0, v0, v5
-; CHECK-BE-NEXT: vextsb2d v2, v2
+; CHECK-BE-NEXT: mtvsrd v2, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha
+; CHECK-BE-NEXT: vextsb2d v3, v3
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l
+; CHECK-BE-NEXT: xvcvsxddp vs0, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_2@toc@l
+; CHECK-BE-NEXT: vperm v3, v4, v2, v3
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: vextsb2d v3, v3
-; CHECK-BE-NEXT: vextsb2d v4, v4
-; CHECK-BE-NEXT: vextsb2d v5, v5
-; CHECK-BE-NEXT: xvcvsxddp vs0, v2
; CHECK-BE-NEXT: xvcvsxddp vs1, v3
-; CHECK-BE-NEXT: xvcvsxddp vs2, v4
-; CHECK-BE-NEXT: xvcvsxddp vs3, v5
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI6_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI6_3@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v2, v3
; CHECK-BE-NEXT: stxv vs1, 48(r3)
-; CHECK-BE-NEXT: stxv vs3, 32(r3)
-; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: vextsb2d v3, v3
+; CHECK-BE-NEXT: xvcvsxddp vs2, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxv vs2, 0(r3)
+; CHECK-BE-NEXT: vextsb2d v2, v2
+; CHECK-BE-NEXT: xvcvsxddp vs3, v2
+; CHECK-BE-NEXT: stxv vs3, 32(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <8 x i8>
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI7_0@toc@ha
-; CHECK-P9-NEXT: addis r5, r2, .LCPI7_1@toc@ha
-; CHECK-P9-NEXT: addis r6, r2, .LCPI7_2@toc@ha
-; CHECK-P9-NEXT: addis r7, r2, .LCPI7_3@toc@ha
-; CHECK-P9-NEXT: addis r8, r2, .LCPI7_4@toc@ha
-; CHECK-P9-NEXT: addis r9, r2, .LCPI7_5@toc@ha
-; CHECK-P9-NEXT: addis r10, r2, .LCPI7_6@toc@ha
-; CHECK-P9-NEXT: addis r11, r2, .LCPI7_7@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI7_0@toc@l
-; CHECK-P9-NEXT: addi r5, r5, .LCPI7_1@toc@l
-; CHECK-P9-NEXT: addi r6, r6, .LCPI7_2@toc@l
-; CHECK-P9-NEXT: addi r7, r7, .LCPI7_3@toc@l
-; CHECK-P9-NEXT: addi r8, r8, .LCPI7_4@toc@l
-; CHECK-P9-NEXT: addi r9, r9, .LCPI7_5@toc@l
-; CHECK-P9-NEXT: addi r10, r10, .LCPI7_6@toc@l
-; CHECK-P9-NEXT: addi r11, r11, .LCPI7_7@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
-; CHECK-P9-NEXT: lxvx v4, 0, r5
-; CHECK-P9-NEXT: lxvx v5, 0, r6
-; CHECK-P9-NEXT: lxvx v0, 0, r7
-; CHECK-P9-NEXT: lxvx v1, 0, r8
-; CHECK-P9-NEXT: lxvx v6, 0, r9
-; CHECK-P9-NEXT: lxvx v7, 0, r10
-; CHECK-P9-NEXT: lxvx v8, 0, r11
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_1@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_1@toc@l
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
-; CHECK-P9-NEXT: vperm v4, v2, v2, v4
-; CHECK-P9-NEXT: vperm v5, v2, v2, v5
-; CHECK-P9-NEXT: vperm v0, v2, v2, v0
-; CHECK-P9-NEXT: vperm v1, v2, v2, v1
-; CHECK-P9-NEXT: vperm v6, v2, v2, v6
-; CHECK-P9-NEXT: vperm v7, v2, v2, v7
-; CHECK-P9-NEXT: vperm v2, v2, v2, v8
; CHECK-P9-NEXT: vextsb2d v3, v3
-; CHECK-P9-NEXT: vextsb2d v4, v4
-; CHECK-P9-NEXT: vextsb2d v5, v5
-; CHECK-P9-NEXT: vextsb2d v0, v0
-; CHECK-P9-NEXT: vextsb2d v1, v1
-; CHECK-P9-NEXT: vextsb2d v6, v6
-; CHECK-P9-NEXT: vextsb2d v7, v7
-; CHECK-P9-NEXT: vextsb2d v2, v2
; CHECK-P9-NEXT: xvcvsxddp vs0, v3
-; CHECK-P9-NEXT: xvcvsxddp vs1, v4
-; CHECK-P9-NEXT: xvcvsxddp vs2, v5
-; CHECK-P9-NEXT: xvcvsxddp vs3, v0
-; CHECK-P9-NEXT: xvcvsxddp vs4, v1
-; CHECK-P9-NEXT: xvcvsxddp vs5, v6
-; CHECK-P9-NEXT: xvcvsxddp vs6, v7
-; CHECK-P9-NEXT: xvcvsxddp vs7, v2
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_2@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_2@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: stxv vs0, 0(r3)
-; CHECK-P9-NEXT: stxv vs7, 112(r3)
-; CHECK-P9-NEXT: stxv vs6, 96(r3)
-; CHECK-P9-NEXT: stxv vs5, 80(r3)
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs1, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_3@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_3@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs2, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_4@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_4@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs3, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_5@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_5@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs4, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_6@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_6@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs5, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: addis r4, r2, .LCPI7_7@toc@ha
+; CHECK-P9-NEXT: addi r4, r4, .LCPI7_7@toc@l
+; CHECK-P9-NEXT: vperm v3, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs5, 80(r3)
+; CHECK-P9-NEXT: vextsb2d v3, v3
+; CHECK-P9-NEXT: xvcvsxddp vs6, v3
+; CHECK-P9-NEXT: lxvx v3, 0, r4
+; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: stxv vs6, 96(r3)
+; CHECK-P9-NEXT: vextsb2d v2, v2
+; CHECK-P9-NEXT: xvcvsxddp vs7, v2
+; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha
-; CHECK-BE-NEXT: addis r5, r2, .LCPI7_1@toc@ha
-; CHECK-BE-NEXT: addis r6, r2, .LCPI7_2@toc@ha
-; CHECK-BE-NEXT: addis r7, r2, .LCPI7_3@toc@ha
-; CHECK-BE-NEXT: addis r8, r2, .LCPI7_4@toc@ha
-; CHECK-BE-NEXT: addis r9, r2, .LCPI7_5@toc@ha
-; CHECK-BE-NEXT: addis r10, r2, .LCPI7_6@toc@ha
-; CHECK-BE-NEXT: addis r11, r2, .LCPI7_7@toc@ha
-; CHECK-BE-NEXT: xxlxor v9, v9, v9
; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l
-; CHECK-BE-NEXT: addi r5, r5, .LCPI7_1@toc@l
-; CHECK-BE-NEXT: addi r6, r6, .LCPI7_2@toc@l
-; CHECK-BE-NEXT: addi r7, r7, .LCPI7_3@toc@l
-; CHECK-BE-NEXT: addi r8, r8, .LCPI7_4@toc@l
-; CHECK-BE-NEXT: addi r9, r9, .LCPI7_5@toc@l
-; CHECK-BE-NEXT: addi r10, r10, .LCPI7_6@toc@l
-; CHECK-BE-NEXT: addi r11, r11, .LCPI7_7@toc@l
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-NEXT: vperm v4, v3, v2, v4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_1@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_1@toc@l
+; CHECK-BE-NEXT: vextsb2d v4, v4
+; CHECK-BE-NEXT: xvcvsxddp vs0, v4
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l
+; CHECK-BE-NEXT: vperm v4, v3, v2, v4
+; CHECK-BE-NEXT: stxv vs0, 16(r3)
+; CHECK-BE-NEXT: vextsb2d v4, v4
+; CHECK-BE-NEXT: xvcvsxddp vs1, v4
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_3@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l
+; CHECK-BE-NEXT: vperm v4, v3, v2, v4
+; CHECK-BE-NEXT: stxv vs1, 48(r3)
+; CHECK-BE-NEXT: vextsb2d v4, v4
+; CHECK-BE-NEXT: xvcvsxddp vs2, v4
+; CHECK-BE-NEXT: lxvx v4, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_4@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_4@toc@l
+; CHECK-BE-NEXT: vperm v3, v3, v2, v4
+; CHECK-BE-NEXT: stxv vs2, 80(r3)
+; CHECK-BE-NEXT: vextsb2d v3, v3
+; CHECK-BE-NEXT: xvcvsxddp vs3, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_5@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_5@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v2, v3
+; CHECK-BE-NEXT: stxv vs3, 112(r3)
+; CHECK-BE-NEXT: vextsb2d v3, v3
+; CHECK-BE-NEXT: xvcvsxddp vs4, v3
; CHECK-BE-NEXT: lxvx v3, 0, r4
-; CHECK-BE-NEXT: lxvx v4, 0, r5
-; CHECK-BE-NEXT: lxvx v5, 0, r6
-; CHECK-BE-NEXT: lxvx v0, 0, r7
-; CHECK-BE-NEXT: lxvx v1, 0, r8
-; CHECK-BE-NEXT: lxvx v6, 0, r9
-; CHECK-BE-NEXT: lxvx v7, 0, r10
-; CHECK-BE-NEXT: lxvx v8, 0, r11
-; CHECK-BE-NEXT: vperm v3, v9, v2, v3
-; CHECK-BE-NEXT: vperm v4, v9, v2, v4
-; CHECK-BE-NEXT: vperm v5, v9, v2, v5
-; CHECK-BE-NEXT: vperm v0, v9, v2, v0
-; CHECK-BE-NEXT: vperm v1, v2, v2, v1
-; CHECK-BE-NEXT: vperm v6, v2, v2, v6
-; CHECK-BE-NEXT: vperm v7, v2, v2, v7
-; CHECK-BE-NEXT: vperm v2, v2, v2, v8
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_6@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_6@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v2, v3
+; CHECK-BE-NEXT: stxv vs4, 0(r3)
; CHECK-BE-NEXT: vextsb2d v3, v3
-; CHECK-BE-NEXT: vextsb2d v4, v4
-; CHECK-BE-NEXT: vextsb2d v5, v5
-; CHECK-BE-NEXT: vextsb2d v0, v0
-; CHECK-BE-NEXT: vextsb2d v1, v1
-; CHECK-BE-NEXT: vextsb2d v6, v6
-; CHECK-BE-NEXT: vextsb2d v7, v7
+; CHECK-BE-NEXT: xvcvsxddp vs5, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: addis r4, r2, .LCPI7_7@toc@ha
+; CHECK-BE-NEXT: addi r4, r4, .LCPI7_7@toc@l
+; CHECK-BE-NEXT: vperm v3, v2, v2, v3
+; CHECK-BE-NEXT: stxv vs5, 32(r3)
+; CHECK-BE-NEXT: vextsb2d v3, v3
+; CHECK-BE-NEXT: xvcvsxddp vs6, v3
+; CHECK-BE-NEXT: lxvx v3, 0, r4
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
+; CHECK-BE-NEXT: stxv vs6, 64(r3)
; CHECK-BE-NEXT: vextsb2d v2, v2
-; CHECK-BE-NEXT: xvcvsxddp vs0, v3
-; CHECK-BE-NEXT: xvcvsxddp vs1, v4
-; CHECK-BE-NEXT: xvcvsxddp vs2, v5
-; CHECK-BE-NEXT: xvcvsxddp vs3, v0
-; CHECK-BE-NEXT: xvcvsxddp vs4, v1
-; CHECK-BE-NEXT: xvcvsxddp vs5, v6
-; CHECK-BE-NEXT: xvcvsxddp vs6, v7
; CHECK-BE-NEXT: xvcvsxddp vs7, v2
-; CHECK-BE-NEXT: stxv vs3, 112(r3)
-; CHECK-BE-NEXT: stxv vs2, 80(r3)
-; CHECK-BE-NEXT: stxv vs1, 48(r3)
-; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs7, 96(r3)
-; CHECK-BE-NEXT: stxv vs6, 64(r3)
-; CHECK-BE-NEXT: stxv vs5, 32(r3)
-; CHECK-BE-NEXT: stxv vs4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <16 x i8> %a to <16 x double>
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 48(r4)
-; CHECK-P9-NEXT: lxv v3, 32(r4)
-; CHECK-P9-NEXT: lxv v4, 16(r4)
-; CHECK-P9-NEXT: lxv v5, 0(r4)
-; CHECK-P9-NEXT: lxv v0, 112(r4)
-; CHECK-P9-NEXT: lxv v1, 96(r4)
-; CHECK-P9-NEXT: lxv v6, 80(r4)
-; CHECK-P9-NEXT: lxv v7, 64(r4)
-; CHECK-P9-NEXT: xvcvuxddp vs0, v5
-; CHECK-P9-NEXT: xvcvuxddp vs1, v4
-; CHECK-P9-NEXT: xvcvuxddp vs2, v3
-; CHECK-P9-NEXT: xvcvuxddp vs3, v2
-; CHECK-P9-NEXT: xvcvuxddp vs4, v7
-; CHECK-P9-NEXT: xvcvuxddp vs5, v6
-; CHECK-P9-NEXT: xvcvuxddp vs6, v1
-; CHECK-P9-NEXT: xvcvuxddp vs7, v0
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: lxv v2, 112(r4)
+; CHECK-P9-NEXT: lxv v3, 96(r4)
+; CHECK-P9-NEXT: lxv v4, 80(r4)
+; CHECK-P9-NEXT: lxv v5, 64(r4)
+; CHECK-P9-NEXT: lxv v0, 48(r4)
+; CHECK-P9-NEXT: xvcvuxddp vs3, v0
+; CHECK-P9-NEXT: lxv v1, 32(r4)
+; CHECK-P9-NEXT: lxv v6, 16(r4)
+; CHECK-P9-NEXT: lxv v7, 0(r4)
+; CHECK-P9-NEXT: xvcvuxddp vs0, v7
+; CHECK-P9-NEXT: xvcvuxddp vs1, v6
+; CHECK-P9-NEXT: xvcvuxddp vs2, v1
+; CHECK-P9-NEXT: xvcvuxddp vs4, v5
+; CHECK-P9-NEXT: xvcvuxddp vs5, v4
+; CHECK-P9-NEXT: xvcvuxddp vs6, v3
+; CHECK-P9-NEXT: xvcvuxddp vs7, v2
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 48(r4)
-; CHECK-BE-NEXT: lxv v3, 32(r4)
-; CHECK-BE-NEXT: lxv v4, 16(r4)
-; CHECK-BE-NEXT: lxv v5, 0(r4)
-; CHECK-BE-NEXT: lxv v0, 112(r4)
-; CHECK-BE-NEXT: lxv v1, 96(r4)
-; CHECK-BE-NEXT: lxv v6, 80(r4)
-; CHECK-BE-NEXT: lxv v7, 64(r4)
-; CHECK-BE-NEXT: xvcvuxddp vs0, v5
-; CHECK-BE-NEXT: xvcvuxddp vs1, v4
-; CHECK-BE-NEXT: xvcvuxddp vs2, v3
-; CHECK-BE-NEXT: xvcvuxddp vs3, v2
-; CHECK-BE-NEXT: xvcvuxddp vs4, v7
-; CHECK-BE-NEXT: xvcvuxddp vs5, v6
-; CHECK-BE-NEXT: xvcvuxddp vs6, v1
-; CHECK-BE-NEXT: xvcvuxddp vs7, v0
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: lxv v2, 112(r4)
+; CHECK-BE-NEXT: lxv v3, 96(r4)
+; CHECK-BE-NEXT: lxv v4, 80(r4)
+; CHECK-BE-NEXT: lxv v5, 64(r4)
+; CHECK-BE-NEXT: lxv v0, 48(r4)
+; CHECK-BE-NEXT: xvcvuxddp vs3, v0
+; CHECK-BE-NEXT: lxv v1, 32(r4)
+; CHECK-BE-NEXT: lxv v6, 16(r4)
+; CHECK-BE-NEXT: lxv v7, 0(r4)
+; CHECK-BE-NEXT: xvcvuxddp vs0, v7
+; CHECK-BE-NEXT: xvcvuxddp vs1, v6
+; CHECK-BE-NEXT: xvcvuxddp vs2, v1
+; CHECK-BE-NEXT: xvcvuxddp vs4, v5
+; CHECK-BE-NEXT: xvcvuxddp vs5, v4
+; CHECK-BE-NEXT: xvcvuxddp vs6, v3
+; CHECK-BE-NEXT: xvcvuxddp vs7, v2
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs4, 64(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv v2, 48(r4)
-; CHECK-P9-NEXT: lxv v3, 32(r4)
-; CHECK-P9-NEXT: lxv v4, 16(r4)
-; CHECK-P9-NEXT: lxv v5, 0(r4)
-; CHECK-P9-NEXT: lxv v0, 112(r4)
-; CHECK-P9-NEXT: lxv v1, 96(r4)
-; CHECK-P9-NEXT: lxv v6, 80(r4)
-; CHECK-P9-NEXT: lxv v7, 64(r4)
-; CHECK-P9-NEXT: xvcvsxddp vs0, v5
-; CHECK-P9-NEXT: xvcvsxddp vs1, v4
-; CHECK-P9-NEXT: xvcvsxddp vs2, v3
-; CHECK-P9-NEXT: xvcvsxddp vs3, v2
-; CHECK-P9-NEXT: xvcvsxddp vs4, v7
-; CHECK-P9-NEXT: xvcvsxddp vs5, v6
-; CHECK-P9-NEXT: xvcvsxddp vs6, v1
-; CHECK-P9-NEXT: xvcvsxddp vs7, v0
-; CHECK-P9-NEXT: stxv vs3, 48(r3)
-; CHECK-P9-NEXT: stxv vs2, 32(r3)
-; CHECK-P9-NEXT: stxv vs1, 16(r3)
-; CHECK-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-P9-NEXT: lxv v2, 112(r4)
+; CHECK-P9-NEXT: lxv v3, 96(r4)
+; CHECK-P9-NEXT: lxv v4, 80(r4)
+; CHECK-P9-NEXT: lxv v5, 64(r4)
+; CHECK-P9-NEXT: lxv v0, 48(r4)
+; CHECK-P9-NEXT: xvcvsxddp vs3, v0
+; CHECK-P9-NEXT: lxv v1, 32(r4)
+; CHECK-P9-NEXT: lxv v6, 16(r4)
+; CHECK-P9-NEXT: lxv v7, 0(r4)
+; CHECK-P9-NEXT: xvcvsxddp vs0, v7
+; CHECK-P9-NEXT: xvcvsxddp vs1, v6
+; CHECK-P9-NEXT: xvcvsxddp vs2, v1
+; CHECK-P9-NEXT: xvcvsxddp vs4, v5
+; CHECK-P9-NEXT: xvcvsxddp vs5, v4
+; CHECK-P9-NEXT: xvcvsxddp vs6, v3
+; CHECK-P9-NEXT: xvcvsxddp vs7, v2
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
+; CHECK-P9-NEXT: stxv vs3, 48(r3)
+; CHECK-P9-NEXT: stxv vs2, 32(r3)
+; CHECK-P9-NEXT: stxv vs1, 16(r3)
+; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 48(r4)
-; CHECK-BE-NEXT: lxv v3, 32(r4)
-; CHECK-BE-NEXT: lxv v4, 16(r4)
-; CHECK-BE-NEXT: lxv v5, 0(r4)
-; CHECK-BE-NEXT: lxv v0, 112(r4)
-; CHECK-BE-NEXT: lxv v1, 96(r4)
-; CHECK-BE-NEXT: lxv v6, 80(r4)
-; CHECK-BE-NEXT: lxv v7, 64(r4)
-; CHECK-BE-NEXT: xvcvsxddp vs0, v5
-; CHECK-BE-NEXT: xvcvsxddp vs1, v4
-; CHECK-BE-NEXT: xvcvsxddp vs2, v3
-; CHECK-BE-NEXT: xvcvsxddp vs3, v2
-; CHECK-BE-NEXT: xvcvsxddp vs4, v7
-; CHECK-BE-NEXT: xvcvsxddp vs5, v6
-; CHECK-BE-NEXT: xvcvsxddp vs6, v1
-; CHECK-BE-NEXT: xvcvsxddp vs7, v0
-; CHECK-BE-NEXT: stxv vs3, 48(r3)
-; CHECK-BE-NEXT: stxv vs2, 32(r3)
-; CHECK-BE-NEXT: stxv vs1, 16(r3)
-; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: lxv v2, 112(r4)
+; CHECK-BE-NEXT: lxv v3, 96(r4)
+; CHECK-BE-NEXT: lxv v4, 80(r4)
+; CHECK-BE-NEXT: lxv v5, 64(r4)
+; CHECK-BE-NEXT: lxv v0, 48(r4)
+; CHECK-BE-NEXT: xvcvsxddp vs3, v0
+; CHECK-BE-NEXT: lxv v1, 32(r4)
+; CHECK-BE-NEXT: lxv v6, 16(r4)
+; CHECK-BE-NEXT: lxv v7, 0(r4)
+; CHECK-BE-NEXT: xvcvsxddp vs0, v7
+; CHECK-BE-NEXT: xvcvsxddp vs1, v6
+; CHECK-BE-NEXT: xvcvsxddp vs2, v1
+; CHECK-BE-NEXT: xvcvsxddp vs4, v5
+; CHECK-BE-NEXT: xvcvsxddp vs5, v4
+; CHECK-BE-NEXT: xvcvsxddp vs6, v3
+; CHECK-BE-NEXT: xvcvsxddp vs7, v2
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs4, 64(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
%add.i = add <16 x i8> %1, %0
tail call void (...) @sink(<16 x i8> %add.i)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vaddubm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i22 = add <16 x i8> %3, %2
tail call void (...) @sink(<16 x i8> %add.i22)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vaddubm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i21 = add <8 x i16> %5, %4
tail call void (...) @sink(<8 x i16> %add.i21)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vadduhm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i20 = add <8 x i16> %7, %6
tail call void (...) @sink(<8 x i16> %add.i20)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vadduhm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i19 = add <4 x i32> %9, %8
tail call void (...) @sink(<4 x i32> %add.i19)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vadduwm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i18 = add <4 x i32> %11, %10
tail call void (...) @sink(<4 x i32> %add.i18)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vadduwm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i17 = add <2 x i64> %13, %12
tail call void (...) @sink(<2 x i64> %add.i17)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vaddudm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i16 = add <2 x i64> %15, %14
tail call void (...) @sink(<2 x i64> %add.i16)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vaddudm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i15 = add <1 x i128> %17, %16
tail call void (...) @sink(<1 x i128> %add.i15)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vadduqm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i14 = add <1 x i128> %19, %18
tail call void (...) @sink(<1 x i128> %add.i14)
; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3
; CHECK: vadduqm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
%add.i13 = fadd <4 x float> %20, %21
tail call void (...) @sink(<4 x float> %add.i13)
; CHECK: lxvx 0, 0, 3
-; CHECK: lxvx 1, 0, 4
+; CHECK: lxvx 1, 0, 3
; CHECK: xvaddsp 34, 0, 1
; CHECK: stxv 34,
; CHECK: bl sink
%add.i12 = fadd <2 x double> %22, %23
tail call void (...) @sink(<2 x double> %add.i12)
; CHECK: lxvx 0, 0, 3
-; CHECK: lxvx 1, 0, 4
+; CHECK: lxvx 1, 0, 3
; CHECK: xvadddp 0, 0, 1
; CHECK: stxv 0,
; CHECK: bl sink
; CHECK-P9-REG-LABEL: foo3
; CHECK-P9-REG: stdu r1, -400(r1)
-; CHECK-P9-REG: lfd f30, 384(r1)
-; CHECK-P9-REG: xsadddp f1, f0, f0
+; CHECK-P9-REG-DAG: lfd f30, 384(r1)
+; CHECK-P9-REG-DAG: xsadddp f1, f0, f0
; CHECK-P9-FISL-LABEL: foo3
; CHECK-P9-FISL: stdu r1, -400(r1)
;
; CHECK-P9-LABEL: testi0:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lfd f0, 0(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
-; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1
+; CHECK-P9-NEXT: lfd [[REG:f[0-9]+]], 0(r4)
+; CHECK-P9-NEXT: lxv [[REG1:vs[0-9]+]], 0(r3)
+; CHECK-P9-NEXT: xxpermdi [[REG2:vs[0-9]+]], [[REG]], [[REG]], 2
+; CHECK-P9-NEXT: xxpermdi v2, [[REG1]], [[REG2]], 1
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%s = load double, double* %p2
;
; CHECK-P9-LABEL: testi1:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lfd f0, 0(r4)
-; CHECK-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
-; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-P9-NEXT: lfd [[REG:f[0-9]+]], 0(r4)
+; CHECK-P9-NEXT: lxv [[REG1:vs[0-9]+]], 0(r3)
+; CHECK-P9-NEXT: xxpermdi [[REG2:vs[0-9]+]], [[REG]], [[REG]], 2
+; CHECK-P9-NEXT: xxmrgld v2, [[REG2]], [[REG1]]
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%s = load double, double* %p2