From b8a9dcf26b8dc572897f8e058561ba6d74ec20a2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 10 Apr 2019 21:42:08 +0000
Subject: [PATCH] [X86] Teach foldMaskedShiftToScaledMask to look through an
 any_extend from i32 to i64 between the and & shl

foldMaskedShiftToScaledMask tries to reorder and & shl to enable the shl to fold into an LEA. But if there is an any_extend between them it doesn't work.

This patch modifies the code to look through any_extend from i32 to i64 when the and mask only uses bits that weren't from the extended part.

This will prevent a regression from D60358 caused by 64-bit SHL being narrowed to 32-bits when their upper bits aren't demanded.

Differential Revision: https://reviews.llvm.org/D60532

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358139 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelDAGToDAG.cpp        | 66 +++++++++++++++--------
 test/CodeGen/X86/fold-and-shift-x86_64.ll |  6 +--
 2 files changed, 46 insertions(+), 26 deletions(-)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index ff660a2423d..2e3655fd10e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1378,12 +1378,31 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
 // allows us to fold the shift into this addressing mode. Returns false if the
 // transform succeeded.
 static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
-                                        SDValue Shift, SDValue X,
                                         X86ISelAddressMode &AM) {
+  SDValue Shift = N.getOperand(0);
+
+  // Use a signed mask so that shifting right will insert sign bits. These
+  // bits will be removed when we shift the result left so it doesn't matter
+  // what we use. This might allow a smaller immediate encoding.
+  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
+
+  // If we have an any_extend feeding the AND, look through it to see if there
+  // is a shift behind it. But only if the AND doesn't use the extended bits.
+  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
+  bool FoundAnyExtend = false;
+  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
+      Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
+      isUInt<32>(Mask)) {
+    FoundAnyExtend = true;
+    Shift = Shift.getOperand(0);
+  }
+
   if (Shift.getOpcode() != ISD::SHL ||
       !isa<ConstantSDNode>(Shift.getOperand(1)))
     return true;
 
+  SDValue X = Shift.getOperand(0);
+
   // Not likely to be profitable if either the AND or SHIFT node has more
   // than one use (unless all uses are for address computation). Besides,
   // isel mechanism requires their node ids to be reused.
@@ -1395,13 +1414,14 @@ static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
   if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
     return true;
 
-  // Use a signed mask so that shifting right will insert sign bits. These
-  // bits will be removed when we shift the result left so it doesn't matter
-  // what we use. This might allow a smaller immediate encoding.
-  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
-
   MVT VT = N.getSimpleValueType();
   SDLoc DL(N);
+  if (FoundAnyExtend) {
+    SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
+    insertDAGNode(DAG, N, NewX);
+    X = NewX;
+  }
+
   SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
   SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
@@ -1851,29 +1871,31 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
            "Unexpected value size!");
 
-    SDValue Shift = N.getOperand(0);
-    if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
-    SDValue X = Shift.getOperand(0);
-
     if (!isa<ConstantSDNode>(N.getOperand(1)))
       break;
-    uint64_t Mask = N.getConstantOperandVal(1);
 
-    // Try to fold the mask and shift into an extract and scale.
-    if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
-      return false;
+    if (N.getOperand(0).getOpcode() == ISD::SRL) {
+      SDValue Shift = N.getOperand(0);
+      SDValue X = Shift.getOperand(0);
 
-    // Try to fold the mask and shift directly into the scale.
-    if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
-      return false;
+      uint64_t Mask = N.getConstantOperandVal(1);
+
+      // Try to fold the mask and shift into an extract and scale.
+      if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+        return false;
+
+      // Try to fold the mask and shift directly into the scale.
+      if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+        return false;
+
+      // Try to fold the mask and shift into BEXTR and scale.
+      if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+        return false;
+    }
 
     // Try to swap the mask and shift to place shifts which can be done as
     // a scale on the outside of the mask.
-    if (!foldMaskedShiftToScaledMask(*CurDAG, N, Shift, X, AM))
-      return false;
-
-    // Try to fold the mask and shift into BEXTR and scale.
-    if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+    if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
       return false;
 
     break;
diff --git a/test/CodeGen/X86/fold-and-shift-x86_64.ll b/test/CodeGen/X86/fold-and-shift-x86_64.ll
index 5413e1bb1b3..c9c1adf8285 100644
--- a/test/CodeGen/X86/fold-and-shift-x86_64.ll
+++ b/test/CodeGen/X86/fold-and-shift-x86_64.ll
@@ -76,14 +76,12 @@ entry:
   ret i8 %tmp9
 }
 
-; FIXME should be able to fold shift into address.
 define i8 @t6(i8* %X, i32 %i) {
 ; CHECK-LABEL: t6:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT:    shll $2, %esi
-; CHECK-NEXT:    andl $60, %esi
-; CHECK-NEXT:    movb (%rdi,%rsi), %al
+; CHECK-NEXT:    andl $15, %esi
+; CHECK-NEXT:    movb (%rdi,%rsi,4), %al
 ; CHECK-NEXT:    retq
 entry:
   %tmp2 = shl i32 %i, 2
-- 
2.50.1