[DAGCombiner] improve throughput of shift+logic+shift

author Sanjay Patel <spatel@rotateright.com>

Sun, 1 Sep 2019 18:38:15 +0000 (18:38 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Sun, 1 Sep 2019 18:38:15 +0000 (18:38 +0000)
author Sanjay Patel <spatel@rotateright.com>
Sun, 1 Sep 2019 18:38:15 +0000 (18:38 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Sun, 1 Sep 2019 18:38:15 +0000 (18:38 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index cefcaa472e0f9fdb95d1a544757cdf0b71a53029..aa69ea563b4e1aa1811db5fedbc4118fc2339481 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7204,6 +7204,72 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    return SDValue();
  }
  
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand with identical opcode, we may be able to convert
+/// that into 2 independent shifts followed by the logic op. This is a
+/// throughput improvement.
+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
+  // Match a one-use bitwise logic op.
+  SDValue LogicOp = Shift->getOperand(0);
+  if (!LogicOp.hasOneUse())
+    return SDValue();
+
+  unsigned LogicOpcode = LogicOp.getOpcode();
+  if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
+      LogicOpcode != ISD::XOR)
+    return SDValue();
+
+  // Find a matching one-use shift by constant.
+  unsigned ShiftOpcode = Shift->getOpcode();
+  SDValue C1 = Shift->getOperand(1);
+  ConstantSDNode *C1Node = isConstOrConstSplat(C1);
+  assert(C1Node && "Expected a shift with constant operand");
+  const APInt &C1Val = C1Node->getAPIntValue();
+  auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
+                             const APInt *&ShiftAmtVal) {
+    if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
+      return false;
+
+    ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
+    if (!ShiftCNode)
+      return false;
+
+    // Capture the shifted operand and shift amount value.
+    ShiftOp = V.getOperand(0);
+    ShiftAmtVal = &ShiftCNode->getAPIntValue();
+
+    // Shift amount types do not have to match their operand type, so check that
+    // the constants are the same width.
+    if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
+      return false;
+
+    // The fold is not valid if the sum of the shift values exceeds bitwidth.
+    if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
+      return false;
+
+    return true;
+  };
+
+  // Logic ops are commutative, so check each operand for a match.
+  SDValue X, Y;
+  const APInt *C0Val;
+  if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
+    Y = LogicOp.getOperand(1);
+  else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
+    Y = LogicOp.getOperand(0);
+  else
+    return SDValue();
+
+  // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+  SDLoc DL(Shift);
+  EVT VT = Shift->getValueType(0);
+  EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
+  SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
+  SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
+  SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
+  return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+}
+
  /// Handle transforms common to the three shifts, when the shift amount is a
  /// constant.
  /// We are looking for: (shift being one of shl/sra/srl)
@@ -7222,6 +7288,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
    if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
      return SDValue();
  
+  // TODO: This is limited to early combining because it may reveal regressions
+  //       otherwise. But since we just checked a target hook to see if this is
+  //       desirable, that should have filtered out cases where this interferes
+  //       with some other pattern matching.
+  if (!LegalTypes)
+    if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+      return R;
+
    // We want to pull some binops through shifts, so that we have (and (shift))
    // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
    // thing happens with address calculations, so it's important to canonicalize
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll

index 0753113fc11170c59ad2e6f7f640a42d6ab44b55..3015844c9cbed6b6f4b986904b5ae24f9990be37 100644 (file)
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -265,12 +265,12 @@ define void @test_32bit_opnd1_better(i32* %existing, i32* %new) {
  define i32 @test_nouseful_bits(i8 %a, i32 %b) {
  ; CHECK-LABEL: test_nouseful_bits:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    bfi w8, w8, #8, #24
-; CHECK-NEXT:    mov w9, w0
-; CHECK-NEXT:    bfi w9, w8, #8, #24
-; CHECK-NEXT:    bfi w0, w9, #8, #24
-; CHECK-NEXT:    lsl w0, w0, #8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    lsl w8, w8, #8
+; CHECK-NEXT:    mov w9, w8
+; CHECK-NEXT:    bfxil w9, w0, #0, #8
+; CHECK-NEXT:    bfi w8, w9, #16, #16
+; CHECK-NEXT:    mov w0, w8
  ; CHECK-NEXT:    ret
    %conv = zext i8 %a to i32     ;   0  0  0  A
    %shl = shl i32 %b, 8          ;   B2 B1 B0 0
diff --git a/test/CodeGen/AArch64/shift-logic.ll b/test/CodeGen/AArch64/shift-logic.ll

index 4d0d7fdf6ac38b04609966eaaf6eac985775e577..af684bbb8aff7c2f8bc7df22ec1c171f2e48d82c 100644 (file)
--- a/test/CodeGen/AArch64/shift-logic.ll
+++ b/test/CodeGen/AArch64/shift-logic.ll
@@ -4,8 +4,8 @@
  define i8 @shl_and(i8 %x, i8 %y) nounwind {
  ; CHECK-LABEL: shl_and:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, w0, lsl #3
-; CHECK-NEXT:    lsl w0, w8, #2
+; CHECK-NEXT:    lsl w8, w0, #5
+; CHECK-NEXT:    and w0, w8, w1, lsl #2
  ; CHECK-NEXT:    ret
    %sh0 = shl i8 %x, 3
    %r = and i8 %sh0, %y
@@ -16,8 +16,8 @@ define i8 @shl_and(i8 %x, i8 %y) nounwind {
  define i16 @shl_or(i16 %x, i16 %y) nounwind {
  ; CHECK-LABEL: shl_or:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w1, w0, lsl #5
-; CHECK-NEXT:    lsl w0, w8, #7
+; CHECK-NEXT:    lsl w8, w0, #12
+; CHECK-NEXT:    orr w0, w8, w1, lsl #7
  ; CHECK-NEXT:    ret
    %sh0 = shl i16 %x, 5
    %r = or i16 %y, %sh0
@@ -28,8 +28,8 @@ define i16 @shl_or(i16 %x, i16 %y) nounwind {
  define i32 @shl_xor(i32 %x, i32 %y) nounwind {
  ; CHECK-LABEL: shl_xor:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w1, w0, lsl #5
-; CHECK-NEXT:    lsl w0, w8, #7
+; CHECK-NEXT:    lsl w8, w0, #12
+; CHECK-NEXT:    eor w0, w8, w1, lsl #7
  ; CHECK-NEXT:    ret
    %sh0 = shl i32 %x, 5
    %r = xor i32 %sh0, %y
@@ -40,8 +40,8 @@ define i32 @shl_xor(i32 %x, i32 %y) nounwind {
  define i64 @lshr_and(i64 %x, i64 %y) nounwind {
  ; CHECK-LABEL: lshr_and:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and x8, x1, x0, lsr #5
-; CHECK-NEXT:    lsr x0, x8, #7
+; CHECK-NEXT:    lsr x8, x0, #12
+; CHECK-NEXT:    and x0, x8, x1, lsr #7
  ; CHECK-NEXT:    ret
    %sh0 = lshr i64 %x, 5
    %r = and i64 %y, %sh0
@@ -52,9 +52,9 @@ define i64 @lshr_and(i64 %x, i64 %y) nounwind {
  define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) nounwind {
  ; CHECK-LABEL: lshr_or:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #5
+; CHECK-NEXT:    ushr v1.4s, v1.4s, #7
+; CHECK-NEXT:    ushr v0.4s, v0.4s, #12
  ; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #7
  ; CHECK-NEXT:    ret
    %sh0 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
    %r = or <4 x i32> %sh0, %y
@@ -65,9 +65,9 @@ define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) nounwind {
  define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %y) nounwind {
  ; CHECK-LABEL: lshr_xor:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushr v0.8h, v0.8h, #5
-; CHECK-NEXT:    eor v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    ushr v0.8h, v0.8h, #7
+; CHECK-NEXT:    ushr v1.8h, v1.8h, #7
+; CHECK-NEXT:    ushr v0.8h, v0.8h, #12
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
  ; CHECK-NEXT:    ret
    %sh0 = lshr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
    %r = xor <8 x i16> %y, %sh0
@@ -79,9 +79,9 @@ define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %y) nounwind {
  define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %y) nounwind {
  ; CHECK-LABEL: ashr_and:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshr v0.16b, v0.16b, #3
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    sshr v0.16b, v0.16b, #2
+; CHECK-NEXT:    sshr v1.16b, v1.16b, #2
+; CHECK-NEXT:    sshr v0.16b, v0.16b, #5
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
  ; CHECK-NEXT:    ret
    %sh0 = ashr <16 x i8> %x, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    %r = and <16 x i8> %y, %sh0
@@ -92,9 +92,9 @@ define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %y) nounwind {
  define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) nounwind {
  ; CHECK-LABEL: ashr_or:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshr v0.2d, v0.2d, #5
+; CHECK-NEXT:    sshr v1.2d, v1.2d, #7
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #12
  ; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    sshr v0.2d, v0.2d, #7
  ; CHECK-NEXT:    ret
    %sh0 = ashr <2 x i64> %x, <i64 5, i64 5>
    %r = or <2 x i64> %sh0, %y
@@ -105,8 +105,8 @@ define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) nounwind {
  define i32 @ashr_xor(i32 %x, i32 %y) nounwind {
  ; CHECK-LABEL: ashr_xor:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w1, w0, asr #5
-; CHECK-NEXT:    asr w0, w8, #7
+; CHECK-NEXT:    asr w8, w0, #12
+; CHECK-NEXT:    eor w0, w8, w1, asr #7
  ; CHECK-NEXT:    ret
    %sh0 = ashr i32 %x, 5
    %r = xor i32 %y, %sh0
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll

index ac234e824016c77cb44bdc6aed472bf0d701c87d..b28f4542cf3d22e01ffd98579f8d1d8ea8ccd0fd 100644 (file)
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -15,10 +15,10 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
  
  ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
  ; CHECK: bl _f2
-; CHECK: cmp {{r[0-9]+}}, #0
-; CHECK-NEXT: it       eq
-; CHECK-NEXT: addeq    {{r[0-9]+}}, #1
-; CHECK-NEXT: lsls
+; CHECK: clz {{r[0-9]+}}
+; CHECK-DAG: lsrs    {{r[0-9]+}}
+; CHECK-DAG: lsls    {{r[0-9]+}}
+; CHECK-NEXT: orr.w   {{r[0-9]+}}
  ; CHECK-NEXT: InlineAsm Start
  define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
  entry:
diff --git a/test/CodeGen/X86/shift-logic.ll b/test/CodeGen/X86/shift-logic.ll

index b2f26a2c4078b315ec06df0adc0b98d8bebb3036..12e2328f7ce87737a453d0e6824316d8080d9d07 100644 (file)
--- a/test/CodeGen/X86/shift-logic.ll
+++ b/test/CodeGen/X86/shift-logic.ll
@@ -4,10 +4,10 @@
  define i8 @shl_and(i8 %x, i8 %y) nounwind {
  ; CHECK-LABEL: shl_and:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal (,%rdi,8), %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shlb $2, %sil
+; CHECK-NEXT:    shlb $5, %al
  ; CHECK-NEXT:    andb %sil, %al
-; CHECK-NEXT:    shlb $2, %al
  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
  ; CHECK-NEXT:    retq
    %sh0 = shl i8 %x, 3
@@ -20,9 +20,9 @@ define i16 @shl_or(i16 %x, i16 %y) nounwind {
  ; CHECK-LABEL: shl_or:
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shll $5, %eax
+; CHECK-NEXT:    shll $7, %esi
+; CHECK-NEXT:    shll $12, %eax
  ; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    shll $7, %eax
  ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
  ; CHECK-NEXT:    retq
    %sh0 = shl i16 %x, 5
@@ -35,9 +35,9 @@ define i32 @shl_xor(i32 %x, i32 %y) nounwind {
  ; CHECK-LABEL: shl_xor:
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shll $5, %eax
+; CHECK-NEXT:    shll $7, %esi
+; CHECK-NEXT:    shll $12, %eax
  ; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    shll $7, %eax
  ; CHECK-NEXT:    retq
    %sh0 = shl i32 %x, 5
    %r = xor i32 %sh0, %y
@@ -49,9 +49,9 @@ define i64 @lshr_and(i64 %x, i64 %y) nounwind {
  ; CHECK-LABEL: lshr_and:
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    shrq $5, %rax
+; CHECK-NEXT:    shrq $7, %rsi
+; CHECK-NEXT:    shrq $12, %rax
  ; CHECK-NEXT:    andq %rsi, %rax
-; CHECK-NEXT:    shrq $7, %rax
  ; CHECK-NEXT:    retq
    %sh0 = lshr i64 %x, 5
    %r = and i64 %y, %sh0
@@ -62,9 +62,9 @@ define i64 @lshr_and(i64 %x, i64 %y) nounwind {
  define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) nounwind {
  ; CHECK-LABEL: lshr_or:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    psrld $5, %xmm0
+; CHECK-NEXT:    psrld $7, %xmm1
+; CHECK-NEXT:    psrld $12, %xmm0
  ; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    psrld $7, %xmm0
  ; CHECK-NEXT:    retq
    %sh0 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
    %r = or <4 x i32> %sh0, %y
@@ -75,9 +75,9 @@ define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) nounwind {
  define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %y) nounwind {
  ; CHECK-LABEL: lshr_xor:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    psrlw $5, %xmm0
+; CHECK-NEXT:    psrlw $7, %xmm1
+; CHECK-NEXT:    psrlw $12, %xmm0
  ; CHECK-NEXT:    pxor %xmm1, %xmm0
-; CHECK-NEXT:    psrlw $7, %xmm0
  ; CHECK-NEXT:    retq
    %sh0 = lshr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
    %r = xor <8 x i16> %y, %sh0
@@ -89,17 +89,17 @@ define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %y) nounwind {
  define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %y) nounwind {
  ; CHECK-LABEL: ashr_and:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    psrlw $3, %xmm0
+; CHECK-NEXT:    psrlw $2, %xmm1
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; CHECK-NEXT:    pxor %xmm2, %xmm1
+; CHECK-NEXT:    psubb %xmm2, %xmm1
+; CHECK-NEXT:    psrlw $5, %xmm0
  ; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
  ; CHECK-NEXT:    pxor %xmm2, %xmm0
  ; CHECK-NEXT:    psubb %xmm2, %xmm0
  ; CHECK-NEXT:    pand %xmm1, %xmm0
-; CHECK-NEXT:    psrlw $2, %xmm0
-; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; CHECK-NEXT:    pxor %xmm1, %xmm0
-; CHECK-NEXT:    psubb %xmm1, %xmm0
  ; CHECK-NEXT:    retq
    %sh0 = ashr <16 x i8> %x, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    %r = and <16 x i8> %y, %sh0
@@ -110,19 +110,19 @@ define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %y) nounwind {
  define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) nounwind {
  ; CHECK-LABEL: ashr_or:
  ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movdqa %xmm1, %xmm2
+; CHECK-NEXT:    psrad $7, %xmm2
+; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-NEXT:    psrlq $7, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
  ; CHECK-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-NEXT:    psrad $5, %xmm2
+; CHECK-NEXT:    psrad $12, %xmm2
  ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; CHECK-NEXT:    psrlq $5, %xmm0
+; CHECK-NEXT:    psrlq $12, %xmm0
  ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
  ; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
  ; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NEXT:    psrad $7, %xmm1
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; CHECK-NEXT:    psrlq $7, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
  ; CHECK-NEXT:    retq
    %sh0 = ashr <2 x i64> %x, <i64 5, i64 5>
    %r = or <2 x i64> %sh0, %y
@@ -134,9 +134,9 @@ define i32 @ashr_xor(i32 %x, i32 %y) nounwind {
  ; CHECK-LABEL: ashr_xor:
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    sarl $5, %eax
+; CHECK-NEXT:    sarl $7, %esi
+; CHECK-NEXT:    sarl $12, %eax
  ; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    sarl $7, %eax
  ; CHECK-NEXT:    retq
    %sh0 = ashr i32 %x, 5
    %r = xor i32 %y, %sh0
author	Sanjay Patel <spatel@rotateright.com>
	Sun, 1 Sep 2019 18:38:15 +0000 (18:38 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Sun, 1 Sep 2019 18:38:15 +0000 (18:38 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AArch64/bitfield-insert.ll		patch \| blob \| history
test/CodeGen/AArch64/shift-logic.ll		patch \| blob \| history
test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll		patch \| blob \| history
test/CodeGen/X86/shift-logic.ll		patch \| blob \| history