[DAGCombiner] Fix infinite loop in vector mul/shl combining

author John Brawn <john.brawn@arm.com>

Wed, 23 Nov 2016 16:05:51 +0000 (16:05 +0000)

committer John Brawn <john.brawn@arm.com>

Wed, 23 Nov 2016 16:05:51 +0000 (16:05 +0000)
author John Brawn <john.brawn@arm.com>
Wed, 23 Nov 2016 16:05:51 +0000 (16:05 +0000)
committer John Brawn <john.brawn@arm.com>
Wed, 23 Nov 2016 16:05:51 +0000 (16:05 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 4890441e37cdfdacd72d01f71d9b0d878e46dbe7..b3813ba82ce443e82d5ac31c3a60e222a430e3bd 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2157,11 +2157,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
  
    // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
    if (N0.getOpcode() == ISD::SHL &&
-      isConstantOrConstantVector(N1) &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
      SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
-    AddToWorklist(C3.getNode());
-    return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+    if (isConstantOrConstantVector(C3))
+      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
    }
  
    // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
@@ -4714,8 +4714,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
        isConstantOrConstantVector(N1, /* No Opaques */ true) &&
        isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
      SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
-    AddToWorklist(Shl.getNode());
-    return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
+    if (isConstantOrConstantVector(Shl))
+      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
    }
  
    if (N1C && !N1C->isOpaque())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index f2bb3795a2d848799602f9d845c9902de29563e5..71c83ae5c0f197300873721bb86ab230fb724255 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3498,25 +3498,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
    EVT SVT = VT.getScalarType();
    SmallVector<SDValue, 4> Outputs;
    for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
-    ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
-    ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
-    if (!V1 || !V2) // Not a constant, bail.
-      return SDValue();
-
-    if (V1->isOpaque() || V2->isOpaque())
-      return SDValue();
+    SDValue V1 = BV1->getOperand(I);
+    SDValue V2 = BV2->getOperand(I);
  
      // Avoid BUILD_VECTOR nodes that perform implicit truncation.
-    // FIXME: This is valid and could be handled by truncating the APInts.
+    // FIXME: This is valid and could be handled by truncation.
      if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
        return SDValue();
  
      // Fold one vector element.
-    std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(),
-                                              V2->getAPIntValue());
-    if (!Folded.second)
+    SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+
+    // Scalar folding only succeeded if the result is a constant or UNDEF.
+    if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+        ScalarResult.getOpcode() != ISD::ConstantFP)
        return SDValue();
-    Outputs.push_back(getConstant(Folded.first, DL, SVT));
+    Outputs.push_back(ScalarResult);
    }
  
    assert(VT.getVectorNumElements() == Outputs.size() &&
diff --git a/test/CodeGen/AArch64/dag-combine-mul-shl.ll b/test/CodeGen/AArch64/dag-combine-mul-shl.ll

new file mode 100644 (file)

index 0000000..00c5005
--- /dev/null
+++ b/test/CodeGen/AArch64/dag-combine-mul-shl.ll
@@ -0,0 +1,117 @@
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+; CHECK-LABEL: fn1_vector:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn1_vector(<16 x i8> %arg) {
+entry:
+  %shl = shl <16 x i8> %arg, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  %mul = mul <16 x i8> %shl, <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  ret <16 x i8> %mul
+}
+
+; CHECK-LABEL: fn2_vector:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn2_vector(<16 x i8> %arg) {
+entry:
+  %mul = mul <16 x i8> %arg, <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  %shl = shl <16 x i8> %mul, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %shl
+}
+
+; CHECK-LABEL: fn1_vector_undef:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn1_vector_undef(<16 x i8> %arg) {
+entry:
+  %shl = shl <16 x i8> %arg, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  %mul = mul <16 x i8> %shl, <i8 undef, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  ret <16 x i8> %mul
+}
+
+; CHECK-LABEL: fn2_vector_undef:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn2_vector_undef(<16 x i8> %arg) {
+entry:
+  %mul = mul <16 x i8> %arg, <i8 undef, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  %shl = shl <16 x i8> %mul, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %shl
+}
+
+; CHECK-LABEL: fn1_scalar:
+; CHECK:      mov w[[REG:[0-9]+]], #1664
+; CHECK-NEXT: mul w0, w0, w[[REG]]
+; CHECK-NEXT: ret
+define i32 @fn1_scalar(i32 %arg) {
+entry:
+  %shl = shl i32 %arg, 7
+  %mul = mul i32 %shl, 13
+  ret i32 %mul
+}
+
+; CHECK-LABEL: fn2_scalar:
+; CHECK:      mov w[[REG:[0-9]+]], #1664
+; CHECK-NEXT: mul w0, w0, w[[REG]]
+; CHECK-NEXT: ret
+define i32 @fn2_scalar(i32 %arg) {
+entry:
+  %mul = mul i32 %arg, 13
+  %shl = shl i32 %mul, 7
+  ret i32 %shl
+}
+
+; CHECK-LABEL: fn1_scalar_undef:
+; CHECK:      mov w0
+; CHECK-NEXT: ret
+define i32 @fn1_scalar_undef(i32 %arg) {
+entry:
+  %shl = shl i32 %arg, 7
+  %mul = mul i32 %shl, undef
+  ret i32 %mul
+}
+
+; CHECK-LABEL: fn2_scalar_undef:
+; CHECK:      mov w0
+; CHECK-NEXT: ret
+define i32 @fn2_scalar_undef(i32 %arg) {
+entry:
+  %mul = mul i32 %arg, undef
+  %shl = shl i32 %mul, 7
+  ret i32 %shl
+}
+
+; CHECK-LABEL: fn1_scalar_opaque:
+; CHECK:      mov w[[REG:[0-9]+]], #13
+; CHECK-NEXT: mul w[[REG]], w0, w[[REG]]
+; CHECK-NEXT: lsl w0, w[[REG]], #7
+; CHECK-NEXT: ret
+define i32 @fn1_scalar_opaque(i32 %arg) {
+entry:
+  %bitcast = bitcast i32 13 to i32
+  %shl = shl i32 %arg, 7
+  %mul = mul i32 %shl, %bitcast
+  ret i32 %mul
+}
+
+; CHECK-LABEL: fn2_scalar_opaque:
+; CHECK:      mov w[[REG:[0-9]+]], #13
+; CHECK-NEXT: mul w[[REG]], w0, w[[REG]]
+; CHECK-NEXT: lsl w0, w[[REG]], #7
+; CHECK-NEXT: ret
+define i32 @fn2_scalar_opaque(i32 %arg) {
+entry:
+  %bitcast = bitcast i32 13 to i32
+  %mul = mul i32 %arg, %bitcast
+  %shl = shl i32 %mul, 7
+  ret i32 %shl
+}
diff --git a/test/CodeGen/X86/shift-pcmp.ll b/test/CodeGen/X86/shift-pcmp.ll

index 4945d6115dbe7c806a55148cfd8ae746fe5e8f33..adfd2f143d17da174ce6cd1220812682d7198b8d 100644 (file)
--- a/test/CodeGen/X86/shift-pcmp.ll
+++ b/test/CodeGen/X86/shift-pcmp.ll
@@ -26,15 +26,13 @@ define <8 x i16> @bar(<8 x i16> %a, <8 x i16> %b) {
  ; SSE-LABEL: bar:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
-; SSE-NEXT:    psrlw $15, %xmm0
-; SSE-NEXT:    psllw $5, %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: bar:
  ; AVX:       # BB#0:
  ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpsrlw $15, %xmm0, %xmm0
-; AVX-NEXT:    vpsllw $5, %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
  ; AVX-NEXT:    retq
  ;
    %icmp = icmp eq <8 x i16> %a, %b
author	John Brawn <john.brawn@arm.com>
	Wed, 23 Nov 2016 16:05:51 +0000 (16:05 +0000)
committer	John Brawn <john.brawn@arm.com>
	Wed, 23 Nov 2016 16:05:51 +0000 (16:05 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
test/CodeGen/AArch64/dag-combine-mul-shl.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/shift-pcmp.ll		patch \| blob \| history