CodeGen: Power: Add lowering for shifts of v1i128.

author Kyle Butt <kyle+llvm@iteratee.net>

Wed, 17 May 2017 21:54:41 +0000 (21:54 +0000)

committer Kyle Butt <kyle+llvm@iteratee.net>

Wed, 17 May 2017 21:54:41 +0000 (21:54 +0000)
author Kyle Butt <kyle+llvm@iteratee.net>
Wed, 17 May 2017 21:54:41 +0000 (21:54 +0000)
committer Kyle Butt <kyle+llvm@iteratee.net>
Wed, 17 May 2017 21:54:41 +0000 (21:54 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 144aea85083370b6fc87aeddcf5d7c7ab5b18b36..e65b1f1aa0a552ad9b7d681d840e559cb2d3ab87 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -689,6 +689,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
          setOperationAction(ISD::SRA, MVT::v2i64, Legal);
          setOperationAction(ISD::SRL, MVT::v2i64, Legal);
  
+        // 128 bit shifts can be accomplished via 3 instructions for SHL and
+        // SRL, but not for SRA because of the instructions available:
+        // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
+        // doing
+        setOperationAction(ISD::SHL, MVT::v1i128, Expand);
+        setOperationAction(ISD::SRL, MVT::v1i128, Expand);
+        setOperationAction(ISD::SRA, MVT::v1i128, Expand);
+
          setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
        }
        else {
@@ -742,6 +750,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      if (Subtarget.hasP9Vector()) {
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+      // 128 bit shifts can be accomplished via 3 instructions for SHL and
+      // SRL, but not for SRA because of the instructions available:
+      // VS{RL} and VS{RL}O.
+      setOperationAction(ISD::SHL, MVT::v1i128, Legal);
+      setOperationAction(ISD::SRL, MVT::v1i128, Legal);
+      setOperationAction(ISD::SRA, MVT::v1i128, Expand);
      }
    }
  
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td

index e14d18fd54331bd2a3ccf96a1c22be43ca21a643..5465b5f2d66cd36d98b74490402ac9a6553fbc05 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -987,12 +987,16 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
            (v8i16 (VSLH $vA, $vB))>;
  def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
            (v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
  def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
            (v16i8 (VSLB $vA, $vB))>;
  def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
            (v8i16 (VSLH $vA, $vB))>;
  def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
            (v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
  
  def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
            (v16i8 (VSRB $vA, $vB))>;
@@ -1000,12 +1004,16 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
            (v8i16 (VSRH $vA, $vB))>;
  def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
            (v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
  def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
            (v16i8 (VSRB $vA, $vB))>;
  def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
            (v8i16 (VSRH $vA, $vB))>;
  def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
            (v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)),
+          (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
  
  def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
            (v16i8 (VSRAB $vA, $vB))>;
diff --git a/test/CodeGen/PowerPC/shift128.ll b/test/CodeGen/PowerPC/shift128.ll

index 17a380c71c3518386ae797416e5c3569c7c8321e..48e1b96f838b6835b5b85f9bbb4afcfaa765e01b 100644 (file)
--- a/test/CodeGen/PowerPC/shift128.ll
+++ b/test/CodeGen/PowerPC/shift128.ll
@@ -1,14 +1,98 @@
-; RUN: llc -verify-machineinstrs < %s -march=ppc64 | grep sld | count 5
+; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefix=P8 --check-prefix=CHECK %s
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs < %s | FileCheck --check-prefix=P9 --check-prefix=CHECK %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
  
-define i128 @foo_lshr(i128 %x, i128 %y) {
+; CHECK-LABEL: lshr:
+; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
+; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
+; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
+; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
+; CHECK-DAG: srd [[R4:[0-9]+]], 4, [[R1]]
+; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
+; CHECK-DAG: or 3, [[R5]], [[R4]]
+; CHECK-DAG: srd 4, 4, 5
+; CHECK: blr
+define i128 @lshr(i128 %x, i128 %y) {
    %r = lshr i128 %x, %y
    ret i128 %r
  }
-define i128 @foo_ashr(i128 %x, i128 %y) {
+; CHECK-LABEL: ashr:
+; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
+; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
+; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
+; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
+; CHECK-DAG: srad [[R4:[0-9]+]], 4, [[R1]]
+; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
+; CHECK-DAG: cmpwi [[R1]], 1
+; CHECK-DAG: srad 4, 4, 5
+; CHECK: isel 3, [[R5]], [[R4]], 0
+; CHECK: blr
+define i128 @ashr(i128 %x, i128 %y) {
    %r = ashr i128 %x, %y
    ret i128 %r
  }
-define i128 @foo_shl(i128 %x, i128 %y) {
+; CHECK-LABEL: shl:
+; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
+; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
+; CHECK-DAG: sld [[R2:[0-9]+]], 4, 5
+; CHECK-DAG: srd [[R3:[0-9]+]], 3, [[R0]]
+; CHECK-DAG: sld [[R4:[0-9]+]], 3, [[R1]]
+; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
+; CHECK-DAG: or 4, [[R5]], [[R4]]
+; CHECK-DAG: sld 3, 3, 5
+; CHECK: blr
+define i128 @shl(i128 %x, i128 %y) {
    %r = shl i128 %x, %y
    ret i128 %r
  }
+
+; CHECK-LABEL: shl_v1i128:
+; P8-NOT: {{\b}}vslo
+; P8-NOT: {{\b}}vsl
+; P9-DAG: vslo
+; P9-DAG: vspltb
+; P9: vsl
+; P9-NOT: {{\b}}sld
+; P9-NOT: {{\b}}srd
+; CHECK: blr
+define i128 @shl_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i128> undef, i128 %arg, i32 0
+  %1 = insertelement <1 x i128> undef, i128 %amt, i32 0
+  %2 = shl <1 x i128> %0, %1
+  %retval = extractelement <1 x i128> %2, i32 0
+  ret i128 %retval
+}
+
+; CHECK-LABEL: lshr_v1i128:
+; P8-NOT: {{\b}}vsro
+; P8-NOT: {{\b}}vsr
+; P9-DAG: vsro
+; P9-DAG: vspltb
+; P9: vsr
+; P9-NOT: {{\b}}srd
+; P9-NOT: {{\b}}sld
+; CHECK: blr
+define i128 @lshr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i128> undef, i128 %arg, i32 0
+  %1 = insertelement <1 x i128> undef, i128 %amt, i32 0
+  %2 = lshr <1 x i128> %0, %1
+  %retval = extractelement <1 x i128> %2, i32 0
+  ret i128 %retval
+}
+
+; Arithmetic shift right is not available as an operation on the vector registers.
+; CHECK-LABEL: ashr_v1i128:
+; CHECK-NOT: {{\b}}vsro
+; CHECK-NOT: {{\b}}vsr
+; CHECK: blr
+define i128 @ashr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
+entry:
+  %0 = insertelement <1 x i128> undef, i128 %arg, i32 0
+  %1 = insertelement <1 x i128> undef, i128 %amt, i32 0
+  %2 = ashr <1 x i128> %0, %1
+  %retval = extractelement <1 x i128> %2, i32 0
+  ret i128 %retval
+}
author	Kyle Butt <kyle+llvm@iteratee.net>
	Wed, 17 May 2017 21:54:41 +0000 (21:54 +0000)
committer	Kyle Butt <kyle+llvm@iteratee.net>
	Wed, 17 May 2017 21:54:41 +0000 (21:54 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCInstrAltivec.td		patch \| blob \| history
test/CodeGen/PowerPC/shift128.ll		patch \| blob \| history