From 0fa88773d572638c348463ef4ca959560d2be917 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 27 Jan 2017 07:46:26 +0000 Subject: [PATCH] [DAGTypeLegalizer] Handle SIGN/ZERO_EXTEND in WidenVecRes_Convert(). In case of a SIGN/ZERO_EXTEND of an incomplete vector type (using only a partial number of available vector elements), WidenVecRes_Convert() used to resort to scalarization. This patch adds a handling of the (common) case where an input vector can be found of same width as the widened result vector, by converting the node to SIGN/ZERO_EXTEND_VECTOR_INREG. Review: Eli Friedman git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293268 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 9 ++ test/CodeGen/SystemZ/vec-sext.ll | 91 +++++++++++++++++++ test/CodeGen/SystemZ/vec-zext.ll | 91 +++++++++++++++++++ 3 files changed, 191 insertions(+) create mode 100644 test/CodeGen/SystemZ/vec-sext.ll create mode 100644 test/CodeGen/SystemZ/vec-zext.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 74740d1641a..39aba8979b1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2326,6 +2326,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, DL, WidenVT, InOp); return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } + if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { + // If both input and result vector types are of same width, extend + // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which + // accepts fewer elements in the result than in the input. + if (Opcode == ISD::SIGN_EXTEND) + return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); + if (Opcode == ISD::ZERO_EXTEND) + return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); + } } if (TLI.isTypeLegal(InWidenVT)) { diff --git a/test/CodeGen/SystemZ/vec-sext.ll b/test/CodeGen/SystemZ/vec-sext.ll new file mode 100644 index 00000000000..9831de52ee8 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-sext.ll @@ -0,0 +1,91 @@ +; Test that vector sexts are done efficently with unpack instructions also in +; case of fewer elements than allowed, e.g. <2 x i32>. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + +define <2 x i16> @fun1(<2 x i8> %val1) { +; CHECK-LABEL: fun1: +; CHECK: vuphb %v24, %v24 +; CHECK-NEXT: br %r14 + %z = sext <2 x i8> %val1 to <2 x i16> + ret <2 x i16> %z +} + +define <2 x i32> @fun2(<2 x i8> %val1) { +; CHECK-LABEL: fun2: +; CHECK: vuphb %v0, %v24 +; CHECK-NEXT: vuphh %v24, %v0 +; CHECK-NEXT: br %r14 + %z = sext <2 x i8> %val1 to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i64> @fun3(<2 x i8> %val1) { +; CHECK-LABEL: fun3: +; CHECK: vuphb %v0, %v24 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 + %z = sext <2 x i8> %val1 to <2 x i64> + ret <2 x i64> %z +} + +define <2 x i32> @fun4(<2 x i16> %val1) { +; CHECK-LABEL: fun4: +; CHECK: vuphh %v24, %v24 +; CHECK-NEXT: br %r14 + %z = sext <2 x i16> %val1 to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i64> @fun5(<2 x i16> %val1) { +; CHECK-LABEL: fun5: +; CHECK: vuphh %v0, %v24 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 + %z = sext <2 x i16> %val1 to <2 x i64> + ret <2 x i64> %z +} + +define <2 x i64> @fun6(<2 x i32> %val1) { +; CHECK-LABEL: fun6: +; CHECK: vuphf %v24, %v24 +; CHECK-NEXT: br %r14 + %z = sext <2 x i32> %val1 to <2 x i64> + ret <2 x i64> %z +} + +define <4 x i16> @fun7(<4 x i8> %val1) { +; CHECK-LABEL: fun7: +; CHECK: vuphb %v24, %v24 +; CHECK-NEXT: br %r14 + %z = sext <4 x i8> %val1 to <4 x i16> + ret <4 x i16> %z +} + +define <4 x i32> @fun8(<4 x i8> %val1) { +; CHECK-LABEL: fun8: +; CHECK: vuphb %v0, %v24 +; CHECK-NEXT: vuphh %v24, %v0 +; CHECK-NEXT: br %r14 + %z = sext <4 x i8> %val1 to <4 x i32> + ret <4 x i32> %z +} + +define <4 x i32> @fun9(<4 x i16> %val1) { +; CHECK-LABEL: fun9: +; CHECK: vuphh %v24, %v24 +; CHECK-NEXT: br %r14 + %z = sext <4 x i16> %val1 to <4 x i32> + ret <4 x i32> %z +} + +define <8 x i16> @fun10(<8 x i8> %val1) { +; CHECK-LABEL: fun10: +; CHECK: vuphb %v24, %v24 +; CHECK-NEXT: br %r14 + %z = sext <8 x i8> %val1 to <8 x i16> + ret <8 x i16> %z +} + diff --git a/test/CodeGen/SystemZ/vec-zext.ll b/test/CodeGen/SystemZ/vec-zext.ll new file mode 100644 index 00000000000..831594d4020 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-zext.ll @@ -0,0 +1,91 @@ +; Test that vector zexts are done efficently with unpack instructions also in +; case of fewer elements than allowed, e.g. <2 x i32>. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + +define <2 x i16> @fun1(<2 x i8> %val1) { +; CHECK-LABEL: fun1: +; CHECK: vuplhb %v24, %v24 +; CHECK-NEXT: br %r14 + %z = zext <2 x i8> %val1 to <2 x i16> + ret <2 x i16> %z +} + +define <2 x i32> @fun2(<2 x i8> %val1) { +; CHECK-LABEL: fun2: +; CHECK: vuplhb %v0, %v24 +; CHECK-NEXT: vuplhh %v24, %v0 +; CHECK-NEXT: br %r14 + %z = zext <2 x i8> %val1 to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i64> @fun3(<2 x i8> %val1) { +; CHECK-LABEL: fun3: +; CHECK: vuplhb %v0, %v24 +; CHECK-NEXT: vuplhh %v0, %v0 +; CHECK-NEXT: vuplhf %v24, %v0 +; CHECK-NEXT: br %r14 + %z = zext <2 x i8> %val1 to <2 x i64> + ret <2 x i64> %z +} + +define <2 x i32> @fun4(<2 x i16> %val1) { +; CHECK-LABEL: fun4: +; CHECK: vuplhh %v24, %v24 +; CHECK-NEXT: br %r14 + %z = zext <2 x i16> %val1 to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i64> @fun5(<2 x i16> %val1) { +; CHECK-LABEL: fun5: +; CHECK: vuplhh %v0, %v24 +; CHECK-NEXT: vuplhf %v24, %v0 +; CHECK-NEXT: br %r14 + %z = zext <2 x i16> %val1 to <2 x i64> + ret <2 x i64> %z +} + +define <2 x i64> @fun6(<2 x i32> %val1) { +; CHECK-LABEL: fun6: +; CHECK: vuplhf %v24, %v24 +; CHECK-NEXT: br %r14 + %z = zext <2 x i32> %val1 to <2 x i64> + ret <2 x i64> %z +} + +define <4 x i16> @fun7(<4 x i8> %val1) { +; CHECK-LABEL: fun7: +; CHECK: vuplhb %v24, %v24 +; CHECK-NEXT: br %r14 + %z = zext <4 x i8> %val1 to <4 x i16> + ret <4 x i16> %z +} + +define <4 x i32> @fun8(<4 x i8> %val1) { +; CHECK-LABEL: fun8: +; CHECK: vuplhb %v0, %v24 +; CHECK-NEXT: vuplhh %v24, %v0 +; CHECK-NEXT: br %r14 + %z = zext <4 x i8> %val1 to <4 x i32> + ret <4 x i32> %z +} + +define <4 x i32> @fun9(<4 x i16> %val1) { +; CHECK-LABEL: fun9: +; CHECK: vuplhh %v24, %v24 +; CHECK-NEXT: br %r14 + %z = zext <4 x i16> %val1 to <4 x i32> + ret <4 x i32> %z +} + +define <8 x i16> @fun10(<8 x i8> %val1) { +; CHECK-LABEL: fun10: +; CHECK: vuplhb %v24, %v24 +; CHECK-NEXT: br %r14 + %z = zext <8 x i8> %val1 to <8 x i16> + ret <8 x i16> %z +} + -- 2.50.1