From 95bd49524cc067af783d2d765b4085939d5ddef0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 9 Feb 2019 20:34:59 +0000 Subject: [PATCH] [X86] CombineOr - fold to generic funnel shifts As discussed on D57389, this is a first step towards moving the SHLD/SHRD matching code to DAGCombiner using FSHL/FSHR instead. There's a bit of work to do before I can do that, so this just folds to FSHL/FSHR in the existing code (handling the different SHRD/FSHR argument ordering), which fixes the issue we had with i16 shift amounts not being correctly masked. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353626 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 43 +++++++++++++++--------------- test/CodeGen/X86/rot16.ll | 8 ++++-- test/CodeGen/X86/shift-double.ll | 8 ++++-- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 635407112cf..d96645f4433 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -37277,23 +37277,30 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, ShAmt1 = ShAmt1.getOperand(0); SDLoc DL(N); - unsigned Opc = X86ISD::SHLD; + unsigned Opc = ISD::FSHL; SDValue Op0 = N0.getOperand(0); SDValue Op1 = N1.getOperand(0); - if (ShAmt0.getOpcode() == ISD::SUB || - ShAmt0.getOpcode() == ISD::XOR) { - Opc = X86ISD::SHRD; + if (ShAmt0.getOpcode() == ISD::SUB || ShAmt0.getOpcode() == ISD::XOR) { + Opc = ISD::FSHR; std::swap(Op0, Op1); std::swap(ShAmt0, ShAmt1); std::swap(ShMsk0, ShMsk1); } - // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C ) - // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C ) - // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C ) - // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C ) - // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C ) - // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C ) + auto GetFunnelShift = [&DAG, &DL, VT, Opc](SDValue Op0, SDValue Op1, + SDValue Amt) { + if (Opc == ISD::FSHR) + std::swap(Op0, Op1); + return DAG.getNode(Opc, DL, VT, Op0, Op1, + DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Amt)); + }; + + // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> FSHL( X, Y, C ) + // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> FSHR( Y, X, C ) + // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHL( X, Y, C ) + // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHR( Y, X, C ) + // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> FSHL( X, Y, C ) + // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> FSHR( Y, X, C ) if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (auto *SumC = dyn_cast(Sum)) { @@ -37303,20 +37310,16 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, if ((SumC->getAPIntValue() == Bits || (SumC->getAPIntValue() == 0 && ShMsk1)) && ShAmt1Op1 == ShAmt0) - return DAG.getNode(Opc, DL, VT, Op0, Op1, - DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + return GetFunnelShift(Op0, Op1, ShAmt0); } } else if (auto *ShAmt1C = dyn_cast(ShAmt1)) { auto *ShAmt0C = dyn_cast(ShAmt0); if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits) - return DAG.getNode(Opc, DL, VT, - N0.getOperand(0), N1.getOperand(0), - DAG.getNode(ISD::TRUNCATE, DL, - MVT::i8, ShAmt0)); + return GetFunnelShift(Op0, Op1, ShAmt0); } else if (ShAmt1.getOpcode() == ISD::XOR) { SDValue Mask = ShAmt1.getOperand(1); if (auto *MaskC = dyn_cast(Mask)) { - unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL); + unsigned InnerShift = (ISD::FSHL == Opc ? ISD::SRL : ISD::SHL); SDValue ShAmt1Op0 = ShAmt1.getOperand(0); if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE) ShAmt1Op0 = ShAmt1Op0.getOperand(0); @@ -37325,14 +37328,12 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, if (Op1.getOpcode() == InnerShift && isa(Op1.getOperand(1)) && Op1.getConstantOperandVal(1) == 1) { - return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0), - DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0); } // Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ). if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD && Op1.getOperand(0) == Op1.getOperand(1)) { - return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0), - DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0); } } } diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll index 69ea7013cc7..b0221c48532 100644 --- a/test/CodeGen/X86/rot16.ll +++ b/test/CodeGen/X86/rot16.ll @@ -28,9 +28,10 @@ define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind { define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind { ; X32-LABEL: bar: ; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: andb $15, %cl ; X32-NEXT: shldw %cl, %dx, %ax ; X32-NEXT: retl ; @@ -38,6 +39,7 @@ define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %esi, %eax +; X64-NEXT: andb $15, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shldw %cl, %di, %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -75,9 +77,10 @@ define i16 @un(i16 %x, i16 %y, i16 %z) nounwind { define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind { ; X32-LABEL: bu: ; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: andb $15, %cl ; X32-NEXT: shrdw %cl, %dx, %ax ; X32-NEXT: retl ; @@ -85,6 +88,7 @@ define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %esi, %eax +; X64-NEXT: andb $15, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrdw %cl, %di, %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll index fd555c4aaac..5e8efa7b9f0 100644 --- a/test/CodeGen/X86/shift-double.ll +++ b/test/CodeGen/X86/shift-double.ll @@ -128,9 +128,10 @@ define i32 @test4(i32 %A, i32 %B, i8 %C) nounwind { define i16 @test5(i16 %A, i16 %B, i8 %C) nounwind { ; X86-LABEL: test5: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: andb $15, %cl ; X86-NEXT: shldw %cl, %dx, %ax ; X86-NEXT: retl ; @@ -138,6 +139,7 @@ define i16 @test5(i16 %A, i16 %B, i8 %C) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $15, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shldw %cl, %si, %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -181,9 +183,10 @@ define i32 @test6(i32 %A, i32 %B, i8 %C) nounwind { define i16 @test7(i16 %A, i16 %B, i8 %C) nounwind { ; X86-LABEL: test7: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: andb $15, %cl ; X86-NEXT: shrdw %cl, %dx, %ax ; X86-NEXT: retl ; @@ -191,6 +194,7 @@ define i16 @test7(i16 %A, i16 %B, i8 %C) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $15, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrdw %cl, %si, %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax -- 2.40.0