; registers and with -fast-isel-abort=1 turned on the test case will then fail.
; When fastisel better supports VSX fix up this test case.
;
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s --check-prefix=ELF64LE
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s
; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 -mattr=-vsx | FileCheck %s --check-prefix=PPC970
;; Tests for 970 don't use -fast-isel-abort=1 because we intentionally punt
define void @sitofp_single_i64(i64 %a, float %b) nounwind {
entry:
-; ELF64: sitofp_single_i64
-; ELF64LE: sitofp_single_i64
+; CHECK: sitofp_single_i64
; PPC970: sitofp_single_i64
%b.addr = alloca float, align 4
%conv = sitofp i64 %a to float
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfids
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfids
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
define void @sitofp_single_i32(i32 %a, float %b) nounwind {
entry:
-; ELF64: sitofp_single_i32
-; ELF64LE: sitofp_single_i32
+; CHECK: sitofp_single_i32
; PPC970: sitofp_single_i32
%b.addr = alloca float, align 4
%conv = sitofp i32 %a to float
-; ELF64: std
-; stack offset used to load the float: 65524 = -16 + 4
-; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
-; ELF64: lfiwax
-; ELF64: fcfids
-; ELF64LE: std
-; stack offset used to load the float: 65520 = -16 + 0
-; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
-; ELF64LE: lfiwax
-; ELF64LE: fcfids
+; CHECK: std
+; CHECK-NEXT: li
+; CHECK-NEXT: lfiwax
+; CHECK-NEXT: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
define void @sitofp_single_i16(i16 %a, float %b) nounwind {
entry:
-; ELF64: sitofp_single_i16
-; ELF64LE: sitofp_single_i16
+; CHECK: sitofp_single_i16
; PPC970: sitofp_single_i16
%b.addr = alloca float, align 4
%conv = sitofp i16 %a to float
-; ELF64: extsh
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfids
-; ELF64LE: extsh
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfids
+; CHECK: extsh
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfids
; PPC970: extsh
; PPC970: std
; PPC970: lfd
define void @sitofp_single_i8(i8 %a) nounwind {
entry:
-; ELF64: sitofp_single_i8
-; ELF64LE: sitofp_single_i8
+; CHECK: sitofp_single_i8
; PPC970: sitofp_single_i8
%b.addr = alloca float, align 4
%conv = sitofp i8 %a to float
-; ELF64: extsb
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfids
-; ELF64LE: extsb
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfids
+; CHECK: extsb
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfids
; PPC970: extsb
; PPC970: std
; PPC970: lfd
define void @sitofp_double_i32(i32 %a, double %b) nounwind {
entry:
-; ELF64: sitofp_double_i32
-; ELF64LE: sitofp_double_i32
+; CHECK: sitofp_double_i32
; PPC970: sitofp_double_i32
%b.addr = alloca double, align 8
%conv = sitofp i32 %a to double
-; ELF64: std
-; stack offset used to load the float: 65524 = -16 + 4
-; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
-; ELF64: lfiwax
-; ELF64: fcfid
-; ELF64LE: std
-; stack offset used to load the float: 65520 = -16 + 0
-; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
-; ELF64LE: lfiwax
-; ELF64LE: fcfid
+; CHECK: std
+; CHECK-NOT: ori
+; CHECK: li
+; CHECK-NOT: ori
+; CHECK: lfiwax
+; CHECK: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
define void @sitofp_double_i64(i64 %a, double %b) nounwind {
entry:
-; ELF64: sitofp_double_i64
-; ELF64LE: sitofp_double_i64
+; CHECK: sitofp_double_i64
; PPC970: sitofp_double_i64
%b.addr = alloca double, align 8
%conv = sitofp i64 %a to double
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfid
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfid
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
define void @sitofp_double_i16(i16 %a, double %b) nounwind {
entry:
-; ELF64: sitofp_double_i16
-; ELF64LE: sitofp_double_i16
+; CHECK: sitofp_double_i16
; PPC970: sitofp_double_i16
%b.addr = alloca double, align 8
%conv = sitofp i16 %a to double
-; ELF64: extsh
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfid
-; ELF64LE: extsh
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfid
+; CHECK: extsh
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfid
; PPC970: extsh
; PPC970: std
; PPC970: lfd
define void @sitofp_double_i8(i8 %a, double %b) nounwind {
entry:
-; ELF64: sitofp_double_i8
-; ELF64LE: sitofp_double_i8
+; CHECK: sitofp_double_i8
; PPC970: sitofp_double_i8
%b.addr = alloca double, align 8
%conv = sitofp i8 %a to double
-; ELF64: extsb
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfid
-; ELF64LE: extsb
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfid
+; CHECK: extsb
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfid
; PPC970: extsb
; PPC970: std
; PPC970: lfd
define void @uitofp_single_i64(i64 %a, float %b) nounwind {
entry:
-; ELF64: uitofp_single_i64
-; ELF64LE: uitofp_single_i64
+; CHECK: uitofp_single_i64
; PPC970: uitofp_single_i64
%b.addr = alloca float, align 4
%conv = uitofp i64 %a to float
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfidus
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfidus
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfidus
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
ret void
define void @uitofp_single_i32(i32 %a, float %b) nounwind {
entry:
-; ELF64: uitofp_single_i32
-; ELF64LE: uitofp_single_i32
+; CHECK: uitofp_single_i32
; PPC970: uitofp_single_i32
%b.addr = alloca float, align 4
%conv = uitofp i32 %a to float
-; ELF64: std
-; stack offset used to load the float: 65524 = -16 + 4
-; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
-; ELF64: lfiwzx
-; ELF64: fcfidus
-; ELF64LE: std
-; stack offset used to load the float: 65520 = -16 + 0
-; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
-; ELF64LE: lfiwzx
-; ELF64LE: fcfidus
+; CHECK: std
+; CHECK-NOT: ori
+; CHECK: li
+; CHECK-NOT: ori
+; CHECK: lfiwzx
+; CHECK: fcfidus
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
define void @uitofp_single_i16(i16 %a, float %b) nounwind {
entry:
-; ELF64: uitofp_single_i16
-; ELF64LE: uitofp_single_i16
+; CHECK: uitofp_single_i16
; PPC970: uitofp_single_i16
%b.addr = alloca float, align 4
%conv = uitofp i16 %a to float
-; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfidus
-; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfidus
+; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfidus
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
; PPC970: std
; PPC970: lfd
define void @uitofp_single_i8(i8 %a) nounwind {
entry:
-; ELF64: uitofp_single_i8
-; ELF64LE: uitofp_single_i8
+; CHECK: uitofp_single_i8
; PPC970: uitofp_single_i8
%b.addr = alloca float, align 4
%conv = uitofp i8 %a to float
-; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfidus
-; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfidus
+; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfidus
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
; PPC970: std
; PPC970: lfd
define void @uitofp_double_i64(i64 %a, double %b) nounwind {
entry:
-; ELF64: uitofp_double_i64
-; ELF64LE: uitofp_double_i64
+; CHECK: uitofp_double_i64
; PPC970: uitofp_double_i64
%b.addr = alloca double, align 8
%conv = uitofp i64 %a to double
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfidu
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfidu
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfidu
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
ret void
define void @uitofp_double_i32(i32 %a, double %b) nounwind {
entry:
-; ELF64: uitofp_double_i32
-; ELF64LE: uitofp_double_i32
+; CHECK: uitofp_double_i32
; PPC970: uitofp_double_i32
%b.addr = alloca double, align 8
%conv = uitofp i32 %a to double
-; ELF64: std
-; stack offset used to load the float: 65524 = -16 + 4
-; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
-; ELF64: lfiwzx
-; ELF64: fcfidu
-; ELF64LE: std
-; stack offset used to load the float: 65520 = -16 + 0
-; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
-; ELF64LE: lfiwzx
-; ELF64LE: fcfidu
+; CHECK: std
+; CHECK-NEXT: li
+; CHECK-NEXT: lfiwzx
+; CHECK-NEXT: fcfidu
+; CHECKLE: fcfidu
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
define void @uitofp_double_i16(i16 %a, double %b) nounwind {
entry:
-; ELF64: uitofp_double_i16
-; ELF64LE: uitofp_double_i16
+; CHECK: uitofp_double_i16
; PPC970: uitofp_double_i16
%b.addr = alloca double, align 8
%conv = uitofp i16 %a to double
-; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfidu
-; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfidu
+; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfidu
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
; PPC970: std
; PPC970: lfd
define void @uitofp_double_i8(i8 %a, double %b) nounwind {
entry:
-; ELF64: uitofp_double_i8
-; ELF64LE: uitofp_double_i8
+; CHECK: uitofp_double_i8
; PPC970: uitofp_double_i8
%b.addr = alloca double, align 8
%conv = uitofp i8 %a to double
-; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
-; ELF64: std
-; ELF64: lfd
-; ELF64: fcfidu
-; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
-; ELF64LE: std
-; ELF64LE: lfd
-; ELF64LE: fcfidu
+; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
+; CHECK: std
+; CHECK: lfd
+; CHECK: fcfidu
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
; PPC970: std
; PPC970: lfd
define void @fptosi_float_i32(float %a) nounwind {
entry:
-; ELF64: fptosi_float_i32
-; ELF64LE: fptosi_float_i32
+; CHECK: fptosi_float_i32
; PPC970: fptosi_float_i32
%b.addr = alloca i32, align 4
%conv = fptosi float %a to i32
-; ELF64: fctiwz
-; ELF64: stfd
-; ELF64: lwa
-; ELF64LE: fctiwz
-; ELF64LE: stfd
-; ELF64LE: lwa
+; CHECK: fctiwz
+; CHECK: stfd
+; CHECK: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
define void @fptosi_float_i64(float %a) nounwind {
entry:
-; ELF64: fptosi_float_i64
-; ELF64LE: fptosi_float_i64
+; CHECK: fptosi_float_i64
; PPC970: fptosi_float_i64
%b.addr = alloca i64, align 4
%conv = fptosi float %a to i64
-; ELF64: fctidz
-; ELF64: stfd
-; ELF64: ld
-; ELF64LE: fctidz
-; ELF64LE: stfd
-; ELF64LE: ld
+; CHECK: fctidz
+; CHECK: stfd
+; CHECK: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
define void @fptosi_double_i32(double %a) nounwind {
entry:
-; ELF64: fptosi_double_i32
-; ELF64LE: fptosi_double_i32
+; CHECK: fptosi_double_i32
; PPC970: fptosi_double_i32
%b.addr = alloca i32, align 8
%conv = fptosi double %a to i32
-; ELF64: fctiwz
-; ELF64: stfd
-; ELF64: lwa
-; ELF64LE: fctiwz
-; ELF64LE: stfd
-; ELF64LE: lwa
+; CHECK: fctiwz
+; CHECK: stfd
+; CHECK: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
define void @fptosi_double_i64(double %a) nounwind {
entry:
-; ELF64: fptosi_double_i64
-; ELF64LE: fptosi_double_i64
+; CHECK: fptosi_double_i64
; PPC970: fptosi_double_i64
%b.addr = alloca i64, align 8
%conv = fptosi double %a to i64
-; ELF64: fctidz
-; ELF64: stfd
-; ELF64: ld
-; ELF64LE: fctidz
-; ELF64LE: stfd
-; ELF64LE: ld
+; CHECK: fctidz
+; CHECK: stfd
+; CHECK: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
define void @fptoui_float_i32(float %a) nounwind {
entry:
-; ELF64: fptoui_float_i32
-; ELF64LE: fptoui_float_i32
+; CHECK: fptoui_float_i32
; PPC970: fptoui_float_i32
%b.addr = alloca i32, align 4
%conv = fptoui float %a to i32
-; ELF64: fctiwuz
-; ELF64: stfd
-; ELF64: lwz
-; ELF64LE: fctiwuz
-; ELF64LE: stfd
-; ELF64LE: lwz
+; CHECK: fctiwuz
+; CHECK: stfd
+; CHECK: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
define void @fptoui_float_i64(float %a) nounwind {
entry:
-; ELF64: fptoui_float_i64
-; ELF64LE: fptoui_float_i64
+; CHECK: fptoui_float_i64
; PPC970: fptoui_float_i64
%b.addr = alloca i64, align 4
%conv = fptoui float %a to i64
-; ELF64: fctiduz
-; ELF64: stfd
-; ELF64: ld
-; ELF64LE: fctiduz
-; ELF64LE: stfd
-; ELF64LE: ld
+; CHECK: fctiduz
+; CHECK: stfd
+; CHECK: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 4
ret void
define void @fptoui_double_i32(double %a) nounwind {
entry:
-; ELF64: fptoui_double_i32
-; ELF64LE: fptoui_double_i32
+; CHECK: fptoui_double_i32
; PPC970: fptoui_double_i32
%b.addr = alloca i32, align 8
%conv = fptoui double %a to i32
-; ELF64: fctiwuz
-; ELF64: stfd
-; ELF64: lwz
-; ELF64LE: fctiwuz
-; ELF64LE: stfd
-; ELF64LE: lwz
+; CHECK: fctiwuz
+; CHECK: stfd
+; CHECK: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
define void @fptoui_double_i64(double %a) nounwind {
entry:
-; ELF64: fptoui_double_i64
-; ELF64LE: fptoui_double_i64
+; CHECK: fptoui_double_i64
; PPC970: fptoui_double_i64
%b.addr = alloca i64, align 8
%conv = fptoui double %a to i64
-; ELF64: fctiduz
-; ELF64: stfd
-; ELF64: ld
-; ELF64LE: fctiduz
-; ELF64LE: stfd
-; ELF64LE: ld
+; CHECK: fctiduz
+; CHECK: stfd
+; CHECK: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 8
ret void
--- /dev/null
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-PWR9 %s
+
+define <4 x i32> @testSpill(<4 x i32> %a, <4 x i32> %b) {
+
+; CHECK-LABEL: testSpill:
+; CHECK: li 11, 80
+; CHECK: li 12, 96
+; CHECK: li 3, 48
+; CHECK: li 10, 64
+; CHECK: stxvd2x 62, 1, 11 # 16-byte Folded Spill
+; CHECK: stxvd2x 63, 1, 12 # 16-byte Folded Spill
+; CHECK: stxvd2x 60, 1, 3 # 16-byte Folded Spill
+; CHECK: stxvd2x 61, 1, 10 # 16-byte Folded Spill
+; CHECK: li 9, 96
+; CHECK: li 10, 80
+; CHECK: li 11, 64
+; CHECK: li 12, 48
+; CHECK: lxvd2x 63, 1, 9 # 16-byte Folded Reload
+; CHECK: lxvd2x 62, 1, 10 # 16-byte Folded Reload
+; CHECK: lxvd2x 61, 1, 11 # 16-byte Folded Reload
+; CHECK: lxvd2x 60, 1, 12 # 16-byte Folded Reload
+; CHECK: mtlr 0
+; CHECK-NEXT: blr
+;
+; CHECK-PWR9-LABEL: testSpill:
+; CHECK-PWR9: stxv 62, 80(1) # 16-byte Folded Spill
+; CHECK-PWR9: stxv 63, 96(1) # 16-byte Folded Spill
+; CHECK-PWR9: stxv 60, 48(1) # 16-byte Folded Spill
+; CHECK-PWR9: stxv 61, 64(1) # 16-byte Folded Spill
+; CHECK-PWR9: lxv 63, 96(1) # 16-byte Folded Reload
+; CHECK-PWR9: lxv 62, 80(1) # 16-byte Folded Reload
+; CHECK-PWR9: lxv 61, 64(1) # 16-byte Folded Reload
+; CHECK-PWR9: lxv 60, 48(1) # 16-byte Folded Reload
+; CHECK-PWR9: mtlr 0
+; CHECK-PWR9-NEXT: blr
+
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b)
+ %tobool = icmp eq i32 %0, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %call = tail call <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b)
+ br label %if.end
+
+if.else: ; preds = %entry
+ %call1 = tail call <4 x i32> @test2(<4 x i32> %b, <4 x i32> %a)
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %c.0.in = phi <4 x i32> [ %call, %if.then ], [ %call1, %if.else ]
+ %call3 = tail call <4 x i32> @test1(<4 x i32> %b, <4 x i32> %a)
+ %call5 = tail call <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b)
+ %add4 = add <4 x i32> %a, <i32 0, i32 0, i32 2, i32 2>
+ %add6 = add <4 x i32> %add4, %c.0.in
+ %c.0 = add <4 x i32> %add6, %call3
+ %add7 = add <4 x i32> %c.0, %call5
+ ret <4 x i32> %add7
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpgtsw.p(i32, <4 x i32>, <4 x i32>)
+declare <4 x i32> @test1(<4 x i32>, <4 x i32>)
+declare <4 x i32> @test2(<4 x i32>, <4 x i32>)