From: Sanjay Patel Date: Wed, 17 Apr 2019 22:38:51 +0000 (+0000) Subject: [x86] try to widen 'shl' as part of LEA formation X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=495f55e1725edda5627fc71ca0d9b8347acca80a;p=llvm [x86] try to widen 'shl' as part of LEA formation The test file has pairs of tests that are logically equivalent: https://rise4fun.com/Alive/2zQ %t4 = and i8 %t1, 8 %t5 = zext i8 %t4 to i16 %sh = shl i16 %t5, 2 %t6 = add i16 %sh, %t0 => %t4 = and i8 %t1, 8 %sh2 = shl i8 %t4, 2 %z5 = zext i8 %sh2 to i16 %t6 = add i16 %z5, %t0 ...so if we can fold the shift op into LEA in the 1st pattern, then we should be able to do the same in the 2nd pattern (unnecessary 'movzbl' is a separate bug I think). We don't want to do this any sooner though because that would conflict with generic transforms that try to narrow the width of the shift. Differential Revision: https://reviews.llvm.org/D60789 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358622 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 82669300309..78e2d22bf57 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1906,6 +1906,42 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; } + case ISD::ZERO_EXTEND: { + // Try to widen a zexted shift left to the same size as its use, so we can + // match the shift as a scale factor. + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) + break; + if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse()) + break; + + // Give up if the shift is not a valid scale factor [1,2,3]. + SDValue Shl = N.getOperand(0); + auto *ShAmtC = dyn_cast(Shl.getOperand(1)); + if (!ShAmtC || ShAmtC->getZExtValue() > 3) + break; + + // The narrow shift must only shift out zero bits (it must be 'nuw'). + // That makes it safe to widen to the destination type. + APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(), + ShAmtC->getZExtValue()); + if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros)) + break; + + // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C) + MVT VT = N.getSimpleValueType(); + SDLoc DL(N); + SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0)); + SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1)); + + // Convert the shift to scale factor. + AM.Scale = 1 << ShAmtC->getZExtValue(); + AM.IndexReg = Zext; + + insertDAGNode(*CurDAG, N, Zext); + insertDAGNode(*CurDAG, N, NewShl); + CurDAG->ReplaceAllUsesWith(N, NewShl); + return false; + } } return matchAddressBase(N, AM); diff --git a/test/CodeGen/X86/lea-dagdag.ll b/test/CodeGen/X86/lea-dagdag.ll index f1e9d60e648..39086198d36 100644 --- a/test/CodeGen/X86/lea-dagdag.ll +++ b/test/CodeGen/X86/lea-dagdag.ll @@ -21,10 +21,10 @@ define i16 @and_i8_zext_shl_add_i16(i16 %t0, i8 %t1) { define i16 @and_i8_shl_zext_add_i16(i16 %t0, i8 %t1) { ; CHECK-LABEL: and_i8_shl_zext_add_i16: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: andb $8, %sil -; CHECK-NEXT: shlb $2, %sil ; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: leal (%rdi,%rax,4), %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %t4 = and i8 %t1, 8 @@ -52,10 +52,10 @@ define i32 @and_i8_zext_shl_add_i32(i32 %t0, i8 %t1) { define i32 @and_i8_shl_zext_add_i32(i32 %t0, i8 %t1) { ; CHECK-LABEL: and_i8_shl_zext_add_i32: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: andb $8, %sil -; CHECK-NEXT: shlb $3, %sil ; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: leal (%rdi,%rax,8), %eax ; CHECK-NEXT: retq %t4 = and i8 %t1, 8 %sh = shl i8 %t4, 3 @@ -112,9 +112,8 @@ define i64 @and_i8_shl_zext_add_i64(i64 %t0, i8 %t1) { ; CHECK-LABEL: and_i8_shl_zext_add_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: andb $8, %sil -; CHECK-NEXT: addb %sil, %sil ; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: leaq (%rdi,%rax,2), %rax ; CHECK-NEXT: retq %t4 = and i8 %t1, 8 %sh = shl i8 %t4, 1 @@ -142,8 +141,7 @@ define i64 @and_i32_shl_zext_add_i64(i64 %t0, i32 %t1) { ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $esi killed $esi def $rsi ; CHECK-NEXT: andl $8, %esi -; CHECK-NEXT: leal (,%rsi,8), %eax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: leaq (%rdi,%rsi,8), %rax ; CHECK-NEXT: retq %t4 = and i32 %t1, 8 %sh = shl i32 %t4, 3 @@ -152,6 +150,8 @@ define i64 @and_i32_shl_zext_add_i64(i64 %t0, i32 %t1) { ret i64 %t6 } +; Negative test - shift can't be converted to scale factor. + define i64 @and_i32_zext_shl_add_i64_overshift(i64 %t0, i32 %t1) { ; CHECK-LABEL: and_i32_zext_shl_add_i64_overshift: ; CHECK: # %bb.0: