From 0c1db381181917d0dcc58ac8930611ce3ea4e1cc Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 19 Jul 2019 08:26:13 +0000 Subject: [PATCH] [InstCombine] Dropping redundant masking before left-shift [1/5] (PR42563) Summary: If we have some pattern that leaves only some low bits set, and then performs left-shift of those bits, if none of the bits that are left after the final shift are modified by the mask, we can omit the mask. There are many variants to this pattern: b. `(x & (~(-1 << maskNbits))) << shiftNbits` All these patterns can be simplified to just: `x << ShiftShAmt` iff: b. `(MaskShAmt+ShiftShAmt) u>= bitwidth(x)` alive proof: b: https://rise4fun.com/Alive/y8M For now let's start with patterns where both shift amounts are variable, with trivial constant "offset" between them, since i believe this is both simplest to handle and i think this is most common. But again, there are likely other variants where we could use ValueTracking/ConstantRange to handle more cases. https://bugs.llvm.org/show_bug.cgi?id=42563 Differential Revision: https://reviews.llvm.org/D64514 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366536 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineShifts.cpp | 7 ++++-- ...dant-left-shift-input-masking-variant-b.ll | 22 +++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index bac4c07cf82..8ffdb661e32 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -71,10 +71,11 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, // // There are many variants to this pattern: // a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt +// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt // All these patterns can be simplified to just: // x << ShiftShAmt // iff: -// a) (MaskShAmt+ShiftShAmt) u>= bitwidth(x) +// a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x) static Instruction * dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, const SimplifyQuery &SQ) { @@ -88,9 +89,11 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // ((1 << MaskShAmt) - 1) auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes()); + // (~(-1 << maskNbits)) + auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes()); Value *X; - if (!match(Masked, m_c_And(MaskA, m_Value(X)))) + if (!match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) return nullptr; // Can we simplify (MaskShAmt+ShiftShAmt) ? diff --git a/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll b/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll index ac415438d68..347010dd106 100644 --- a/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll +++ b/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll @@ -25,7 +25,7 @@ define i32 @t0_basic(i32 %x, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]] ; CHECK-NEXT: ret i32 [[T4]] ; %t0 = shl i32 -1, %nbits @@ -50,7 +50,7 @@ define i32 @t1_bigger_shift(i32 %x, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]] ; CHECK-NEXT: ret i32 [[T4]] ; %t0 = shl i32 -1, %nbits @@ -77,7 +77,7 @@ define i32 @t2_bigger_mask(i32 %x, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) ; CHECK-NEXT: call void @use32(i32 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T3]], [[T4]] +; CHECK-NEXT: [[T5:%.*]] = shl i32 [[X]], [[T4]] ; CHECK-NEXT: ret i32 [[T5]] ; %t0 = add i32 %nbits, 1 @@ -109,7 +109,7 @@ define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]] +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]] ; CHECK-NEXT: ret <3 x i32> [[T5]] ; %t0 = add <3 x i32> %nbits, @@ -138,7 +138,7 @@ define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]] +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]] ; CHECK-NEXT: ret <3 x i32> [[T5]] ; %t0 = add <3 x i32> %nbits, @@ -166,7 +166,7 @@ define <3 x i32> @t5_vec_undef(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[T3]], [[T4]] +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]] ; CHECK-NEXT: ret <3 x i32> [[T5]] ; %t0 = add <3 x i32> %nbits, @@ -198,7 +198,7 @@ define i32 @t6_commutativity0(i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]] ; CHECK-NEXT: ret i32 [[T4]] ; %x = call i32 @gen32() @@ -260,7 +260,7 @@ define i32 @t8_commutativity2(i32 %nbits0, i32 %nbits1) { ; CHECK-NEXT: call void @use32(i32 [[T3]]) ; CHECK-NEXT: call void @use32(i32 [[T4]]) ; CHECK-NEXT: call void @use32(i32 [[T5]]) -; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T4]], [[T5]] +; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T1]], [[T5]] ; CHECK-NEXT: ret i32 [[T6]] ; %t0 = shl i32 -1, %nbits0 @@ -291,7 +291,7 @@ define i32 @t9_nuw(i32 %x, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = shl nuw i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]] ; CHECK-NEXT: ret i32 [[T4]] ; %t0 = shl i32 -1, %nbits @@ -316,7 +316,7 @@ define i32 @t10_nsw(i32 %x, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]] ; CHECK-NEXT: ret i32 [[T4]] ; %t0 = shl i32 -1, %nbits @@ -341,7 +341,7 @@ define i32 @t11_nuw_nsw(i32 %x, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = shl nuw nsw i32 [[T2]], [[T3]] +; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]] ; CHECK-NEXT: ret i32 [[T4]] ; %t0 = shl i32 -1, %nbits -- 2.40.0