[X86][SSE] Add support for combining PINSRW into a target shuffle.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 31 Jan 2017 13:51:10 +0000 (13:51 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 31 Jan 2017 13:51:10 +0000 (13:51 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 31 Jan 2017 13:51:10 +0000 (13:51 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 31 Jan 2017 13:51:10 +0000 (13:51 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a05c7f58e9ebe379e9e40a3311cecc38fe8cc131..6b235d08ce3f6e6c444f57c38574af69327b3046 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5770,12 +5770,21 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
      return true;
    }
    case X86ISD::PINSRW: {
-    // Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern.
-    // TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc.
      SDValue InVec = N.getOperand(0);
      SDValue InScl = N.getOperand(1);
      uint64_t InIdx = N.getConstantOperandVal(2);
      assert(InIdx < NumElts && "Illegal insertion index");
+
+    // Attempt to recognise a PINSRW(VEC, 0, Idx) shuffle pattern.
+    if (X86::isZeroNode(InScl)) {
+      Ops.push_back(InVec);
+      for (unsigned i = 0; i != NumElts; ++i)
+        Mask.push_back(i == InIdx ? SM_SentinelZero : i);
+      return true;
+    }
+
+    // Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern.
+    // TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc.
      if (InScl.getOpcode() != ISD::AssertZext ||
          InScl.getOperand(0).getOpcode() != X86ISD::PEXTRW)
        return false;
@@ -30597,6 +30606,24 @@ static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
+static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const X86Subtarget &Subtarget) {
+  unsigned Opcode = N->getOpcode();
+  assert(((X86ISD::PINSRB == Opcode && N->getValueType(0) ==MVT::v16i8) ||
+          (X86ISD::PINSRW == Opcode && N->getValueType(0) ==MVT::v8i16)) &&
+         "Unexpected vector insertion");
+
+  // Attempt to combine PINSRB/PINSRW patterns to a shuffle.
+  SDValue Op(N, 0);
+  SmallVector<int, 1> NonceMask; // Just a placeholder.
+  NonceMask.push_back(0);
+  combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+                                /*Depth*/ 1, /*HasVarMask*/ false, DAG,
+                                DCI, Subtarget);
+  return SDValue();
+}
+
  /// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs
  /// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for
  /// OR -> CMPNEQSS.
@@ -34159,6 +34186,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case X86ISD::VSRLI:       return combineVectorShift(N, DAG, DCI, Subtarget);
    case X86ISD::VSEXT:
    case X86ISD::VZEXT:       return combineVSZext(N, DAG, DCI, Subtarget);
+  case X86ISD::PINSRB:
+  case X86ISD::PINSRW:      return combineVectorInsert(N, DAG, DCI, Subtarget);
    case X86ISD::SHUFP:       // Handle all target specific shuffles
    case X86ISD::INSERTPS:
    case X86ISD::PALIGNR:
diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll

index 9256717f155d97499953910374d9ddf6e70efd35..d22c2e3d98ce51eb3a4e0754e965f0ec4bbb9843 100644 (file)
--- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -94,7 +94,8 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
  ;
  ; AVX-LABEL: _clearupper8xi16a:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
  ; AVX-NEXT:    retq
    %x0 = extractelement <8 x i16> %0, i32 0
    %x1 = extractelement <8 x i16> %0, i32 1
@@ -317,11 +318,7 @@ define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
  define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
  ; SSE-LABEL: _clearupper4xi32b:
  ; SSE:       # BB#0:
-; SSE-NEXT:    xorl %eax, %eax
-; SSE-NEXT:    pinsrw $1, %eax, %xmm0
-; SSE-NEXT:    pinsrw $3, %eax, %xmm0
-; SSE-NEXT:    pinsrw $5, %eax, %xmm0
-; SSE-NEXT:    pinsrw $7, %eax, %xmm0
+; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: _clearupper4xi32b:
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 31 Jan 2017 13:51:10 +0000 (13:51 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 31 Jan 2017 13:51:10 +0000 (13:51 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/clear_upper_vector_element_bits.ll		patch \| blob \| history