From: Nikita Popov Date: Sun, 17 Mar 2019 15:45:38 +0000 (+0000) Subject: [DAGCombine] Fold (x & ~y) | y patterns X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7cb598a0d7a702c3a23f4c14a8c1a5437951e8f6;p=llvm [DAGCombine] Fold (x & ~y) | y patterns Fold (x & ~y) | y and it's four commuted variants to x | y. This pattern can in particular appear when a vselect c, x, -1 is expanded to (x & ~c) | (-1 & c) and combined to (x & ~c) | c. This change has some overlap with D59066, which avoids creating a vselect of this form in the first place during uaddsat expansion. Differential Revision: https://reviews.llvm.org/D59174 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356333 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 98eb1a1bfe7..43a1005d84a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5279,6 +5279,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { return SDValue(); } +/// OR combines for which the commuted variant will be tried as well. +static SDValue visitORCommutative( + SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { + EVT VT = N0.getValueType(); + if (N0.getOpcode() == ISD::AND) { + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) + if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + + // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) + if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); + } + + return SDValue(); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5426,6 +5443,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } + if (SDValue Combined = visitORCommutative(DAG, N0, N1, N)) + return Combined; + if (SDValue Combined = visitORCommutative(DAG, N1, N0, N)) + return Combined; + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) diff --git a/test/CodeGen/AArch64/sat-add.ll b/test/CodeGen/AArch64/sat-add.ll index 6483001ed9c..36e63f3594b 100644 --- a/test/CodeGen/AArch64/sat-add.ll +++ b/test/CodeGen/AArch64/sat-add.ll @@ -454,8 +454,7 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) { ; CHECK-NEXT: movi v1.4s, #42 ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %a = add <4 x i32> %x, %c = icmp ugt <4 x i32> %x, %a @@ -470,8 +469,7 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) { ; CHECK-NEXT: mvni v2.4s, #42 ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %a = add <4 x i32> %x, %c = icmp ugt <4 x i32> %x, @@ -503,8 +501,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) { ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %a = add <2 x i64> %x, %c = icmp ugt <2 x i64> %x, %a @@ -521,8 +518,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) { ; CHECK-NEXT: dup v2.2d, x9 ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %a = add <2 x i64> %x, %c = icmp ugt <2 x i64> %x, @@ -637,8 +633,7 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i ; CHECK: // %bb.0: ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %a = add <4 x i32> %x, %y %c = icmp ugt <4 x i32> %x, %a @@ -652,8 +647,7 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 ; CHECK-NEXT: mvn v2.16b, v1.16b ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %noty = xor <4 x i32> %y, %a = add <4 x i32> %x, %y @@ -682,8 +676,7 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i ; CHECK: // %bb.0: ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %a = add <2 x i64> %x, %y %c = icmp ugt <2 x i64> %x, %a @@ -697,8 +690,7 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 ; CHECK-NEXT: mvn v2.16b, v1.16b ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %noty = xor <2 x i64> %y, %a = add <2 x i64> %x, %y diff --git a/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll b/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll index f3dd80f71fe..fdb5965db16 100644 --- a/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ b/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -359,8 +359,7 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: out_constant_varx_mone_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w0, w2 -; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: orr w0, w0, w2 ; CHECK-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %notmask, %x @@ -442,8 +441,7 @@ define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: out_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w1, w2 -; CHECK-NEXT: orr w0, w2, w8 +; CHECK-NEXT: orr w0, w1, w2 ; CHECK-NEXT: ret %notmask = xor i32 %mask, -1 %mx = and i32 %mask, -1 diff --git a/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll b/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll index ee150f1e5bd..0e2c891816c 100644 --- a/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll +++ b/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll @@ -34,7 +34,6 @@ define <4 x i32> @in_constant_varx_mone(<4 x i32> %x, <4 x i32> %y, <4 x i32> %m define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: out_constant_varx_mone_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %notmask = xor <4 x i32> %mask, @@ -119,8 +118,7 @@ define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: out_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b -; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %notmask = xor <4 x i32> %mask, %mx = and <4 x i32> %mask, @@ -132,8 +130,7 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> % define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b -; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <4 x i32> , %y ; %x %n1 = and <4 x i32> %n0, %mask diff --git a/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll b/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll index b4065c219ad..1f125d9ec7a 100644 --- a/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll +++ b/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll @@ -587,15 +587,13 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: out_constant_varx_mone_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edx, %eax -; CHECK-NOBMI-NEXT: notl %eax -; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: orl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out_constant_varx_mone_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %edi, %edx, %eax +; CHECK-BMI-NEXT: movl %edi, %eax ; CHECK-BMI-NEXT: orl %edx, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 @@ -722,15 +720,13 @@ define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: out_constant_mone_vary: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edx, %eax -; CHECK-NOBMI-NEXT: notl %eax -; CHECK-NOBMI-NEXT: andl %esi, %eax +; CHECK-NOBMI-NEXT: movl %esi, %eax ; CHECK-NOBMI-NEXT: orl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out_constant_mone_vary: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %esi, %edx, %eax +; CHECK-BMI-NEXT: movl %esi, %eax ; CHECK-BMI-NEXT: orl %edx, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 diff --git a/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll index 7cb0d3ff58f..08979d277ec 100644 --- a/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ b/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -85,26 +85,21 @@ define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 -; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 -; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) +; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 +; CHECK-SSE1-NEXT: orps (%rcx), %xmm0 +; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1 -; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 -; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0 -; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 +; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0 +; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_constant_varx_mone_invmask: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 -; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1 -; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0 +; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16 @@ -311,26 +306,21 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i3 ; CHECK-SSE1-LABEL: out_constant_mone_vary: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 -; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 -; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) +; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 +; CHECK-SSE1-NEXT: orps (%rcx), %xmm0 +; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_mone_vary: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1 -; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 -; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 -; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 +; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0 +; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_constant_mone_vary: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 -; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1 -; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 +; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 +; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16 @@ -355,17 +345,14 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32 ; ; CHECK-SSE2-LABEL: in_constant_mone_vary: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1 -; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 -; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 -; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 +; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0 +; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 -; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1 -; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 +; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 +; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16