From 6ed7489c1e7b9aaaa19d3287e87d30be30ce9d83 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Jan 2019 17:58:30 +0000 Subject: [PATCH] [LegalizeIntegerTypes] When promoting the result of an extract_vector_elt also promote the input type if necessary By also promoting the input type we get a better idea for what scalar type to use. This can provide better results if the result of the extract is sign extended. What was previously happening is that the extract result would be legalized, sometime later the input of the sign extend would be legalized using the result of the extract. Then later the extract input would be legalized forcing a truncate into the input of the sign extend using a replace all uses. This requires DAG combine to combine out the sext/truncate pair. But sometimes we visited the truncate first and messed things up before the sext could be combined. By creating the extract with the correct scalar type when we create legalize the result type, the truncate will be added right away. Then when the sign_extend input is legalized it will create an any_extend of the truncate which can be optimized by getNode to maybe remove the truncate. And then a sign_extend_inreg. Now DAG combine doesn't have to worry about getting rid of the extend. This fixes the regression on X86 in D56156. Differential Revision: https://reviews.llvm.org/D56176 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350236 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 22 ++++++- test/CodeGen/X86/setcc-combine.ll | 63 ++++++++----------- 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a42e89da994..2be8ac20238 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -441,8 +441,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), - N->getOperand(1)); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // If the input also needs to be promoted, do that first so we can get a + // get a good idea for the output type. + if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue In = GetPromotedInteger(Op0); + + // If the new type is larger than NVT, use it. We probably won't need to + // promote it again. + EVT SVT = In.getValueType().getScalarType(); + if (SVT.bitsGE(NVT)) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1); + return DAG.getAnyExtOrTrunc(Ext, dl, NVT); + } + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1); } SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { diff --git a/test/CodeGen/X86/setcc-combine.ll b/test/CodeGen/X86/setcc-combine.ll index da29c3b9a63..e8ebce9e56a 100644 --- a/test/CodeGen/X86/setcc-combine.ll +++ b/test/CodeGen/X86/setcc-combine.ll @@ -8,8 +8,8 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) { ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_eq_1: @@ -17,8 +17,7 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) { ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pextrb $4, %xmm0, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm0, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %A, %B %sext = sext <4 x i1> %cmp to <4 x i32> @@ -32,15 +31,14 @@ define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) { ; SSE2-LABEL: test_ne_1: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: pextrw $2, %xmm1, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_ne_1: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE41-NEXT: pextrb $4, %xmm1, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %A, %B %sext = sext <4 x i1> %cmp to <4 x i32> @@ -69,8 +67,8 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) { ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_ge_1: @@ -78,8 +76,7 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) { ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pextrb $4, %xmm0, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm0, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %A, %B %sext = sext <4 x i1> %cmp to <4 x i32> @@ -93,15 +90,14 @@ define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) { ; SSE2-LABEL: test_lt_1: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: pextrw $2, %xmm1, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_lt_1: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE41-NEXT: pextrb $4, %xmm1, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %A, %B %sext = sext <4 x i1> %cmp to <4 x i32> @@ -130,8 +126,8 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) { ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: pextrw $2, %xmm1, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_eq_2: @@ -139,8 +135,7 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) { ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrb $4, %xmm1, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %B, %A %sext = sext <4 x i1> %cmp to <4 x i32> @@ -154,15 +149,14 @@ define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) { ; SSE2-LABEL: test_ne_2: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_ne_2: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: pextrb $4, %xmm0, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm0, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %B, %A %sext = sext <4 x i1> %cmp to <4 x i32> @@ -178,8 +172,8 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) { ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: pextrw $2, %xmm1, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_le_2: @@ -187,8 +181,7 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) { ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrb $4, %xmm1, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %B, %A %sext = sext <4 x i1> %cmp to <4 x i32> @@ -215,15 +208,14 @@ define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) { ; SSE2-LABEL: test_lt_2: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_lt_2: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: pextrb $4, %xmm0, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm0, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %B, %A %sext = sext <4 x i1> %cmp to <4 x i32> @@ -237,15 +229,14 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) { ; SSE2-LABEL: test_gt_2: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: movsbl %al, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_gt_2: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: pextrb $4, %xmm0, %eax -; SSE41-NEXT: movsbl %al, %eax +; SSE41-NEXT: pextrd $1, %xmm0, %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %B, %A %sext = sext <4 x i1> %cmp to <4 x i32> -- 2.50.1