[SystemZ] Add the CoveredBySubRegs bit to GPR64, GPR128 and FPR128 registers.

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Tue, 12 Sep 2017 12:11:29 +0000 (12:11 +0000)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Tue, 12 Sep 2017 12:11:29 +0000 (12:11 +0000)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Tue, 12 Sep 2017 12:11:29 +0000 (12:11 +0000)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Tue, 12 Sep 2017 12:11:29 +0000 (12:11 +0000)
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td

index 52ba1a584017a4f67dc21c4596430f7a364d44e9..a1cfaf699401962d2ac8ef496cb881455bf9f2e5 100644 (file)
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -65,6 +65,7 @@ class GPR64<bits<16> num, string n, GPR32 low, GPR32 high>
   : SystemZRegWithSubregs<n, [low, high]> {
    let HWEncoding = num;
    let SubRegIndices = [subreg_l32, subreg_h32];
+  let CoveredBySubRegs = 1;
  }
  
  // 8 even-odd pairs of GPR64s.
@@ -72,6 +73,7 @@ class GPR128<bits<16> num, string n, GPR64 low, GPR64 high>
   : SystemZRegWithSubregs<n, [low, high]> {
    let HWEncoding = num;
    let SubRegIndices = [subreg_l64, subreg_h64];
+  let CoveredBySubRegs = 1;
  }
  
  // General-purpose registers
@@ -194,6 +196,7 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
   : SystemZRegWithSubregs<n, [low, high]> {
    let HWEncoding = num;
    let SubRegIndices = [subreg_l64, subreg_h64];
+  let CoveredBySubRegs = 1;
  }
  
  // Floating-point registers.  Registers 16-31 require the vector facility.
diff --git a/test/CodeGen/SystemZ/loop-03.ll b/test/CodeGen/SystemZ/loop-03.ll

new file mode 100644 (file)

index 0000000..b95e1ae
--- /dev/null
+++ b/test/CodeGen/SystemZ/loop-03.ll
@@ -0,0 +1,126 @@
+; Test that loops with sufficient registers do not reload or spill on
+; stack. These cases include calls and it is necessary to have the GR128 /
+; FP128 registers part of the callee saved registers list in order to avoid
+; spilling / reloading.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+%0 = type { %0*, %0*, %0*, i32, %1*, i64, i64, i64, i64, i64, i64, %2, %5, %7 }
+%1 = type { i32, i32, i32 (%1*, i64, i32)*, i32 (%1*, i64, i64, i32, i8**)*, i32 (%1*, i64, i64, i64, i32)*, i32 (%1*)*, void (i8*)*, i8*, i8* }
+%2 = type { i64, i64, %3** }
+%3 = type { %4*, i64 }
+%4 = type { i64, i8* }
+%5 = type { i64, i64, %6** }
+%6 = type { i64, %4*, i32, i64, i8* }
+%7 = type { i64, i64, %8** }
+%8 = type { i64, i64*, i64*, %4*, i64, i32*, %5, i32, i64, i64 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+
+define void @fun0(%0*) {
+; CHECK-LABEL: .LBB0_4
+; CHECK: =>  This Inner Loop Header: Depth=2
+; CHECK-NOT: 16-byte Folded Spill
+; CHECK-NOT: 16-byte Folded Reload
+
+  %2 = load i64, i64* undef, align 8
+  %3 = udiv i64 128, %2
+  %4 = mul i64 %3, %2
+  %5 = load i64, i64* undef, align 8
+  switch i32 undef, label %36 [
+    i32 1, label %6
+    i32 2, label %7
+    i32 3, label %8
+    i32 4, label %9
+    i32 5, label %10
+    i32 6, label %11
+  ]
+
+; <label>:6:                                      ; preds = %1
+  br label %12
+
+; <label>:7:                                      ; preds = %1
+  br label %12
+
+; <label>:8:                                      ; preds = %1
+  unreachable
+
+; <label>:9:                                      ; preds = %1
+  unreachable
+
+; <label>:10:                                     ; preds = %1
+  unreachable
+
+; <label>:11:                                     ; preds = %1
+  unreachable
+
+; <label>:12:                                     ; preds = %7, %6
+  %13 = getelementptr inbounds %0, %0* %0, i64 0, i32 5
+  br label %14
+
+; <label>:14:                                     ; preds = %31, %12
+  %15 = phi i64 [ undef, %31 ], [ %5, %12 ]
+  %16 = phi i64 [ %35, %31 ], [ undef, %12 ]
+  %17 = load i64, i64* %13, align 8
+  %18 = icmp ult i64 %15, %17
+  %19 = select i1 %18, i64 %15, i64 %17
+  %20 = udiv i64 %19, %4
+  %21 = icmp ugt i64 %20, 1
+  %22 = select i1 %21, i64 %20, i64 1
+  %23 = sub i64 %22, 0
+  br label %24
+
+; <label>:24:                                     ; preds = %24, %14
+  %25 = phi i64 [ %23, %14 ], [ %27, %24 ]
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* nonnull undef, i64 %4, i32 1, i1 false)
+  %26 = getelementptr inbounds i8, i8* null, i64 %4
+  store i8* %26, i8** undef, align 8
+  %27 = add i64 %25, -4
+  %28 = icmp eq i64 %27, 0
+  br i1 %28, label %31, label %24
+
+; <label>:29:                                     ; preds = %24
+  br i1 undef, label %31, label %30
+
+; <label>:30:                                     ; preds = %29
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %26, i8* nonnull undef, i64 %4, i32 1, i1 false)
+  br label %31
+
+; <label>:31:                                     ; preds = %30, %29
+  %32 = call signext i32 undef(%1* undef, i64 %16, i32 signext 8)
+  %33 = icmp eq i64 undef, 0
+  %34 = select i1 %33, i64 0, i64 %19
+  %35 = add i64 %34, %16
+  br i1 %33, label %36, label %14
+
+; <label>:36:                                     ; preds = %31, %1
+  ret void
+}
+
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+define void @fun1(fp128*) {
+; CHECK-LABEL: .LBB1_2
+; CHECK: =>This Inner Loop Header: Depth=1
+; CHECK-NOT: 16-byte Folded Spill
+; CHECK-NOT: 16-byte Folded Reload
+; CHECK-LABEL: .LBB1_3
+
+  br i1 undef, label %7, label %2
+
+; <label>:2:                                      ; preds = %2, %1
+  %3 = phi fp128 [ %5, %2 ], [ 0xL00000000000000000000000000000000, %1 ]
+  %4 = tail call fp128 @llvm.pow.f128(fp128 0xL00000000000000000000000000000000, fp128 0xL00000000000000000000000000000000) #2
+  %5 = fadd fp128 %3, %4
+  %6 = icmp eq i64 undef, 0
+  br i1 %6, label %7, label %2
+
+; <label>:7:                                      ; preds = %2, %1
+  %8 = phi fp128 [ 0xL00000000000000000000000000000000, %1 ], [ %5, %2 ]
+  %9 = fadd fp128 0xL00000000000000000000000000000000, %8
+  %10 = fadd fp128 0xL00000000000000000000000000000000, %9
+  %11 = fadd fp128 0xL00000000000000000000000000000000, %10
+  %12 = tail call fp128 @llvm.pow.f128(fp128 %11, fp128 0xL00000000000000000000000000000000) #2
+  store fp128 %12, fp128* %0, align 8
+  ret void
+}
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Tue, 12 Sep 2017 12:11:29 +0000 (12:11 +0000)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Tue, 12 Sep 2017 12:11:29 +0000 (12:11 +0000)
lib/Target/SystemZ/SystemZRegisterInfo.td		patch \| blob \| history
test/CodeGen/SystemZ/loop-03.ll	[new file with mode: 0644]	patch \| blob