+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: < %s | FileCheck %s
; CHECK-NEXT: xvnegsp v0, v1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader
-; CHECK: lfd f0, 0(r3)
+; CHECK-NEXT: #
+; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxpermdi v1, f0, f0, 2
; CHECK-NEXT: vperm v6, v1, v3, v4
; CHECK-NEXT: vperm v1, v3, v1, v2
; P9BE-NEXT: xvnegsp v0, v1
; P9BE-NEXT: .p2align 4
; P9BE-NEXT: .LBB0_1: # %for.cond1.preheader
-; P9BE: lfd f0, 0(r3)
+; P9BE-NEXT: #
+; P9BE-NEXT: lfd f0, 0(r3)
; P9BE-NEXT: xxlor v1, vs0, vs0
; P9BE-NEXT: vperm v6, v3, v1, v4
; P9BE-NEXT: vperm v1, v3, v1, v2
;}
define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
+; CHECK-LABEL: test32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: add r5, r3, r4
+; CHECK-NEXT: lfiwzx f0, r3, r4
+; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l
+; CHECK-NEXT: lxvx v4, 0, r3
+; CHECK-NEXT: li r3, 4
+; CHECK-NEXT: xxpermdi v2, f0, f0, 2
+; CHECK-NEXT: lfiwzx f0, r5, r3
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: vperm v2, v2, v3, v4
+; CHECK-NEXT: xxpermdi v5, f0, f0, 2
+; CHECK-NEXT: vperm v3, v5, v3, v4
+; CHECK-NEXT: vspltisw v4, 8
+; CHECK-NEXT: vnegw v3, v3
+; CHECK-NEXT: vadduwm v4, v4, v4
+; CHECK-NEXT: vslw v3, v3, v4
+; CHECK-NEXT: vsubuwm v2, v3, v2
+; CHECK-NEXT: xxswapd vs0, v2
+; CHECK-NEXT: stxvx vs0, 0, r3
+; CHECK-NEXT: blr
+;
+; P9BE-LABEL: test32:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: add r5, r3, r4
+; P9BE-NEXT: lfiwzx f0, r3, r4
+; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
+; P9BE-NEXT: lxvx v4, 0, r3
+; P9BE-NEXT: li r3, 4
+; P9BE-NEXT: xxsldwi v2, f0, f0, 1
+; P9BE-NEXT: lfiwzx f0, r5, r3
+; P9BE-NEXT: xxlxor v3, v3, v3
+; P9BE-NEXT: vperm v2, v3, v2, v4
+; P9BE-NEXT: xxsldwi v5, f0, f0, 1
+; P9BE-NEXT: vperm v3, v3, v5, v4
+; P9BE-NEXT: vspltisw v4, 8
+; P9BE-NEXT: vnegw v3, v3
+; P9BE-NEXT: vadduwm v4, v4, v4
+; P9BE-NEXT: vslw v3, v3, v4
+; P9BE-NEXT: vsubuwm v2, v3, v2
+; P9BE-NEXT: xxswapd vs0, v2
+; P9BE-NEXT: stxvx vs0, 0, r3
+; P9BE-NEXT: blr
entry:
%idx.ext63 = sext i32 %i_pix2 to i64
%add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63
%13 = shufflevector <4 x i32> %12, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
store <4 x i32> %13, <4 x i32>* undef, align 16
ret void
-; CHECK-LABEL: test32:
-; CHECK-NOT: lwzux
-; CHECK-NOT: mtvsrws
-; CHECK: lfiwzx
-; CHECK: lfiwzx
-; P9BE-CHECK-LABEL: test32:
-; P9BE-CHECK-NOT: lwzux
-; P9BE-CHECK-NOT: mtvsrws
-; P9BE-CHECK: lfiwzx
-; P9BE-CHECK: lfiwzx
}
define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
+; CHECK-LABEL: test16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi r4, r4, 1
+; CHECK-NEXT: lxsihzx v2, r3, r4
+; CHECK-NEXT: vsplth v2, v2, 3
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: vmrglh v2, v3, v2
+; CHECK-NEXT: vsplth v4, v3, 7
+; CHECK-NEXT: add r6, r3, r4
+; CHECK-NEXT: li r3, 16
+; CHECK-NEXT: vmrglw v2, v2, v4
+; CHECK-NEXT: lxsihzx v4, r6, r3
+; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l
+; CHECK-NEXT: vsplth v4, v4, 3
+; CHECK-NEXT: vmrglh v3, v3, v4
+; CHECK-NEXT: lxvx v4, 0, r3
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: vperm v2, v3, v2, v4
+; CHECK-NEXT: xxspltw v3, v2, 2
+; CHECK-NEXT: vadduwm v2, v2, v3
+; CHECK-NEXT: vextuwrx r3, r3, v2
+; CHECK-NEXT: cmpw cr0, r3, r5
+; CHECK-NEXT: bgelr+ cr0
+; CHECK-NEXT: # %bb.1: # %if.then
+;
+; P9BE-LABEL: test16:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: sldi r4, r4, 1
+; P9BE-NEXT: add r6, r3, r4
+; P9BE-NEXT: li r7, 16
+; P9BE-NEXT: lxsihzx v2, r6, r7
+; P9BE-NEXT: vsplth v2, v2, 3
+; P9BE-NEXT: lxsihzx v4, r3, r4
+; P9BE-NEXT: li r6, 0
+; P9BE-NEXT: sldi r6, r6, 48
+; P9BE-NEXT: mtvsrd v3, r6
+; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
+; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
+; P9BE-NEXT: vmrghh v2, v3, v2
+; P9BE-NEXT: vsplth v4, v4, 3
+; P9BE-NEXT: vmrghh v4, v3, v4
+; P9BE-NEXT: vsplth v3, v3, 0
+; P9BE-NEXT: vmrghw v3, v3, v4
+; P9BE-NEXT: lxvx v4, 0, r3
+; P9BE-NEXT: li r3, 0
+; P9BE-NEXT: vperm v2, v3, v2, v4
+; P9BE-NEXT: xxspltw v3, v2, 1
+; P9BE-NEXT: vadduwm v2, v2, v3
+; P9BE-NEXT: vextuwlx r3, r3, v2
+; P9BE-NEXT: cmpw cr0, r3, r5
+; P9BE-NEXT: bgelr+ cr0
+; P9BE-NEXT: # %bb.1: # %if.then
entry:
%idxprom = sext i32 %delta to i64
%add14 = add nsw i32 %delta, 8
if.end: ; preds = %for.body
ret void
-; CHECK-LABEL: test16:
-; CHECK-NOT: lhzux
-; CHECK: lxsihzx
-; CHECK: lxsihzx
-; P9BE-CHECK-LABEL: test16:
-; P9BE-CHECK-NOT: lhzux
-; P9BE-CHECK: lxsihzx
-; P9BE-CHECK: lxsihzx
}
define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
+; CHECK-LABEL: test8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxsibzx v2, r3, r4
+; CHECK-NEXT: add r6, r3, r4
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: mtvsrd f0, r3
+; CHECK-NEXT: li r3, 8
+; CHECK-NEXT: xxswapd v3, vs0
+; CHECK-NEXT: vspltb v2, v2, 7
+; CHECK-NEXT: lxsibzx v5, r6, r3
+; CHECK-NEXT: vspltb v5, v5, 7
+; CHECK-NEXT: vmrglb v2, v3, v2
+; CHECK-NEXT: vspltb v4, v3, 15
+; CHECK-NEXT: vmrglb v3, v3, v5
+; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NEXT: vmrglh v2, v2, v4
+; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l
+; CHECK-NEXT: vmrglw v2, v2, v4
+; CHECK-NEXT: vmrglh v3, v3, v4
+; CHECK-NEXT: vmrglw v3, v4, v3
+; CHECK-NEXT: lxvx v4, 0, r3
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: vperm v2, v3, v2, v4
+; CHECK-NEXT: xxspltw v3, v2, 2
+; CHECK-NEXT: vadduwm v2, v2, v3
+; CHECK-NEXT: vextuwrx r3, r3, v2
+; CHECK-NEXT: cmpw cr0, r3, r5
+; CHECK-NEXT: bgelr+ cr0
+; CHECK-NEXT: # %bb.1: # %if.then
+;
+; P9BE-LABEL: test8:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: add r6, r3, r4
+; P9BE-NEXT: li r7, 8
+; P9BE-NEXT: lxsibzx v2, r6, r7
+; P9BE-NEXT: vspltb v2, v2, 7
+; P9BE-NEXT: lxsibzx v4, r3, r4
+; P9BE-NEXT: li r6, 0
+; P9BE-NEXT: sldi r6, r6, 56
+; P9BE-NEXT: mtvsrd v3, r6
+; P9BE-NEXT: vmrghb v2, v3, v2
+; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
+; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
+; P9BE-NEXT: vspltb v4, v4, 7
+; P9BE-NEXT: vmrghb v4, v3, v4
+; P9BE-NEXT: vspltb v3, v3, 0
+; P9BE-NEXT: vmrghh v4, v4, v3
+; P9BE-NEXT: xxspltw v3, v3, 0
+; P9BE-NEXT: vmrghw v2, v4, v2
+; P9BE-NEXT: lxvx v4, 0, r3
+; P9BE-NEXT: li r3, 0
+; P9BE-NEXT: vperm v2, v3, v2, v4
+; P9BE-NEXT: xxspltw v3, v2, 1
+; P9BE-NEXT: vadduwm v2, v2, v3
+; P9BE-NEXT: vextuwlx r3, r3, v2
+; P9BE-NEXT: cmpw cr0, r3, r5
+; P9BE-NEXT: bgelr+ cr0
+; P9BE-NEXT: # %bb.1: # %if.then
entry:
%idxprom = sext i32 %delta to i64
%add14 = add nsw i32 %delta, 8
if.end: ; preds = %for.body
ret void
-; CHECK-LABEL: test8:
-; CHECK-NOT: lbzux
-; CHECK: lxsibzx
-; CHECK: lxsibzx
-; P9BE-CHECK-LABEL: test8:
-; P9BE-CHECK-NOT: lbzux
-; P9BE-CHECK: lxsibzx
-; P9BE-CHECK: lxsibzx
}