From b8a9dcf26b8dc572897f8e058561ba6d74ec20a2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Apr 2019 21:42:08 +0000 Subject: [PATCH] [X86] Teach foldMaskedShiftToScaledMask to look through an any_extend from i32 to i64 between the and & shl foldMaskedShiftToScaledMask tries to reorder and & shl to enable the shl to fold into an LEA. But if there is an any_extend between them it doesn't work. This patch modifies the code to look through any_extend from i32 to i64 when the and mask only uses bits that weren't from the extended part. This will prevent a regression from D60358 caused by 64-bit SHL being narrowed to 32-bits when their upper bits aren't demanded. Differential Revision: https://reviews.llvm.org/D60532 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358139 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 66 +++++++++++++++-------- test/CodeGen/X86/fold-and-shift-x86_64.ll | 6 +-- 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index ff660a2423d..2e3655fd10e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1378,12 +1378,31 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, // allows us to fold the shift into this addressing mode. Returns false if the // transform succeeded. static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, - SDValue Shift, SDValue X, X86ISelAddressMode &AM) { + SDValue Shift = N.getOperand(0); + + // Use a signed mask so that shifting right will insert sign bits. These + // bits will be removed when we shift the result left so it doesn't matter + // what we use. This might allow a smaller immediate encoding. + int64_t Mask = cast(N->getOperand(1))->getSExtValue(); + + // If we have an any_extend feeding the AND, look through it to see if there + // is a shift behind it. But only if the AND doesn't use the extended bits. + // FIXME: Generalize this to other ANY_EXTEND than i32 to i64? + bool FoundAnyExtend = false; + if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && + Shift.getOperand(0).getSimpleValueType() == MVT::i32 && + isUInt<32>(Mask)) { + FoundAnyExtend = true; + Shift = Shift.getOperand(0); + } + if (Shift.getOpcode() != ISD::SHL || !isa(Shift.getOperand(1))) return true; + SDValue X = Shift.getOperand(0); + // Not likely to be profitable if either the AND or SHIFT node has more // than one use (unless all uses are for address computation). Besides, // isel mechanism requires their node ids to be reused. @@ -1395,13 +1414,14 @@ static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) return true; - // Use a signed mask so that shifting right will insert sign bits. These - // bits will be removed when we shift the result left so it doesn't matter - // what we use. This might allow a smaller immediate encoding. - int64_t Mask = cast(N->getOperand(1))->getSExtValue(); - MVT VT = N.getSimpleValueType(); SDLoc DL(N); + if (FoundAnyExtend) { + SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); + insertDAGNode(DAG, N, NewX); + X = NewX; + } + SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); @@ -1851,29 +1871,31 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, assert(N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"); - SDValue Shift = N.getOperand(0); - if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; - SDValue X = Shift.getOperand(0); - if (!isa(N.getOperand(1))) break; - uint64_t Mask = N.getConstantOperandVal(1); - // Try to fold the mask and shift into an extract and scale. - if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) - return false; + if (N.getOperand(0).getOpcode() == ISD::SRL) { + SDValue Shift = N.getOperand(0); + SDValue X = Shift.getOperand(0); - // Try to fold the mask and shift directly into the scale. - if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) - return false; + uint64_t Mask = N.getConstantOperandVal(1); + + // Try to fold the mask and shift into an extract and scale. + if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) + return false; + + // Try to fold the mask and shift directly into the scale. + if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) + return false; + + // Try to fold the mask and shift into BEXTR and scale. + if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget)) + return false; + } // Try to swap the mask and shift to place shifts which can be done as // a scale on the outside of the mask. - if (!foldMaskedShiftToScaledMask(*CurDAG, N, Shift, X, AM)) - return false; - - // Try to fold the mask and shift into BEXTR and scale. - if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget)) + if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM)) return false; break; diff --git a/test/CodeGen/X86/fold-and-shift-x86_64.ll b/test/CodeGen/X86/fold-and-shift-x86_64.ll index 5413e1bb1b3..c9c1adf8285 100644 --- a/test/CodeGen/X86/fold-and-shift-x86_64.ll +++ b/test/CodeGen/X86/fold-and-shift-x86_64.ll @@ -76,14 +76,12 @@ entry: ret i8 %tmp9 } -; FIXME should be able to fold shift into address. define i8 @t6(i8* %X, i32 %i) { ; CHECK-LABEL: t6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: shll $2, %esi -; CHECK-NEXT: andl $60, %esi -; CHECK-NEXT: movb (%rdi,%rsi), %al +; CHECK-NEXT: andl $15, %esi +; CHECK-NEXT: movb (%rdi,%rsi,4), %al ; CHECK-NEXT: retq entry: %tmp2 = shl i32 %i, 2 -- 2.50.1