[SelectionDAG] Improve support for promotion of <1 x fX> floating point argument...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 2 May 2017 10:33:08 +0000 (10:33 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 2 May 2017 10:33:08 +0000 (10:33 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 2 May 2017 10:33:08 +0000 (10:33 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 2 May 2017 10:33:08 +0000 (10:33 +0000)
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h

index f214852a79c4edf54b1427d21bf0a0af539b943f..9e1d148c7ce50f11d1de494329a0ae2faaf669fc 100644 (file)
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -688,6 +688,10 @@ public:
    /// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3>
    SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV);
  
+  /// Convert Op, which must be of float type, to the
+  /// float type VT, by either extending or rounding (by truncation).
+  SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
+
    /// Convert Op, which must be of integer type, to the
    /// integer type VT, by either any-extending or truncating it.
    SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

index 247470b4425d9081c55aba5edc33feb7e74836b4..97a7fab6efd003ac846db98ab1cbd83fa257b4bd 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -523,16 +523,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
    return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
  }
  
-/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,\r
-/// so just return the element, ignoring the index.\r
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {\r
-  EVT VT = N->getValueType(0);\r
-  SDValue Res = GetScalarizedVector(N->getOperand(0));\r
-  if (Res.getValueType() != VT)\r
-    Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);\r
-  return Res;\r
-}\r
-\r
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  if (Res.getValueType() != VT)
+    Res = VT.isFloatingPoint()
+              ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
+              : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
+  return Res;
+}
  
  /// If the input condition is a vector that needs to be scalarized, it must be
  /// <1 x i1>, so just convert to a normal ISD::SELECT
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 209abf9234d3c9e761f927acd5d0c3122e7112c0..9d949a2bbfa6e3ae7247e0a90e2699bc3feefa1d 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -959,6 +959,12 @@ void SelectionDAG::clear() {
    DbgInfo->clear();
  }
  
+SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType())
+             ? getNode(ISD::FP_EXTEND, DL, VT, Op)
+             : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
+}
+
  SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
    return VT.bitsGT(Op.getValueType()) ?
      getNode(ISD::ANY_EXTEND, DL, VT, Op) :
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 866cff1cda57c183afb2644e5ef79a8af1680517..ba9e11798f15ea47074660f0d3fac85a7b648fa1 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -350,7 +350,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
  
    EVT ValueSVT = ValueVT.getVectorElementType();
    if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
-    Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+    Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
+                                    : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
  
    return DAG.getBuildVector(ValueVT, DL, Val);
  }
@@ -543,10 +544,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
        Val = DAG.getNode(
            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
-
-      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
      }
  
+    assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
      Parts[0] = Val;
      return;
    }
diff --git a/test/CodeGen/NVPTX/f16-instructions.ll b/test/CodeGen/NVPTX/f16-instructions.ll

index 403a67f02f80aae0755d819c64395d02a285dc1f..3d4140820794ff8445455278b3f1a1042c4a8b6d 100644 (file)
--- a/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/test/CodeGen/NVPTX/f16-instructions.ll
@@ -36,6 +36,21 @@ define half @test_fadd(half %a, half %b) #0 {
    ret half %r
  }
  
+; CHECK-LABEL: test_fadd_v1f16(
+; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
+; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
+; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
+  %r = fadd <1 x half> %a, %b
+  ret <1 x half> %r
+}
+
  ; Check that we can lower fadd with immediate arguments.
  ; CHECK-LABEL: test_fadd_imm_0(
  ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
diff --git a/test/CodeGen/X86/pr31088.ll b/test/CodeGen/X86/pr31088.ll

index ca1e08b68000114ae2abb402a73253890dc3912c..0dd8eb0ece85acfc3685cfb5f54cfe8491395001 100644 (file)
--- a/test/CodeGen/X86/pr31088.ll
+++ b/test/CodeGen/X86/pr31088.ll
@@ -3,6 +3,63 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
  
+define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
+; X86-LABEL: ir_fadd_v1f16:
+; X86:       # BB#0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: ir_fadd_v1f16:
+; X64:       # BB#0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
+; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    addss (%rsp), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    popq %rax
+; X64-NEXT:    retq
+;
+; F16C-LABEL: ir_fadd_v1f16:
+; F16C:       # BB#0:
+; F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
+; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
+; F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; F16C-NEXT:    retq
+  %retval = fadd <1 x half> %arg0, %arg1
+  ret <1 x half> %retval
+}
+
  define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
  ; X86-LABEL: ir_fadd_v2f16:
  ; X86:       # BB#0:
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 2 May 2017 10:33:08 +0000 (10:33 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 2 May 2017 10:33:08 +0000 (10:33 +0000)
include/llvm/CodeGen/SelectionDAG.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
test/CodeGen/NVPTX/f16-instructions.ll		patch \| blob \| history
test/CodeGen/X86/pr31088.ll		patch \| blob \| history