From f1a319a05f66af71eb4efc3a0d047fc11bf7400c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 29 Nov 2017 22:15:43 +0000 Subject: [PATCH] [SelectionDAG][X86] Teach promotion legalization for fp_to_sint/fp_to_uint to insert an assertsext/assertzext based on the original type If we put in an assertsext/zext here, we're able to generate better truncate code using pack on pre-avx512 targets. Similar is already done during type legalization. This is the equivalent for op legalization Differential Revision: https://reviews.llvm.org/D40591 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319368 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeVectorOps.cpp | 14 ++++++++++--- test/CodeGen/X86/avx-cvt-2.ll | 20 ++++--------------- test/CodeGen/X86/vec_cast2.ll | 5 +---- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 928db8280bb..74970ab5792 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -512,9 +512,17 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { } } - SDLoc loc(Op); - SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); + SDLoc dl(Op); + SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + dl, NewVT, Promoted, + DAG.getValueType(VT.getScalarType())); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); } SDValue VectorLegalizer::ExpandLoad(SDValue Op) { diff --git a/test/CodeGen/X86/avx-cvt-2.ll b/test/CodeGen/X86/avx-cvt-2.ll index c955756811f..f38127960bf 100644 --- a/test/CodeGen/X86/avx-cvt-2.ll +++ b/test/CodeGen/X86/avx-cvt-2.ll @@ -12,10 +12,7 @@ define void @fptoui16(%f32vec_t %a, %i16vec_t *%p) { ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -29,10 +26,7 @@ define void @fptosi16(%f32vec_t %a, %i16vec_t *%p) { ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -46,10 +40,7 @@ define void @fptoui8(%f32vec_t %a, %i8vec_t *%p) { ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovq %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper @@ -64,10 +55,7 @@ define void @fptosi8(%f32vec_t %a, %i8vec_t *%p) { ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovq %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll index c03b330b88e..723b0ca53fc 100644 --- a/test/CodeGen/X86/vec_cast2.ll +++ b/test/CodeGen/X86/vec_cast2.ll @@ -88,10 +88,7 @@ define <8 x i8> @foo3_8(<8 x float> %src) { ; CHECK: ## BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl ; -- 2.50.1