[X86] Add a DAG combine to turn v16i16->v16i8 VTRUNCUS+store into a saturating trunca...

author Craig Topper <craig.topper@intel.com>

Fri, 11 Oct 2019 04:16:49 +0000 (04:16 +0000)

committer Craig Topper <craig.topper@intel.com>

Fri, 11 Oct 2019 04:16:49 +0000 (04:16 +0000)
author Craig Topper <craig.topper@intel.com>
Fri, 11 Oct 2019 04:16:49 +0000 (04:16 +0000)
committer Craig Topper <craig.topper@intel.com>
Fri, 11 Oct 2019 04:16:49 +0000 (04:16 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 275e876644c9889a5488c472c0ee5ffa054d36c0..0e119415303263c90b8602ef12e9e04f9ce23bb0 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -40448,6 +40448,19 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
                               MVT::v16i8, St->getMemOperand());
    }
  
+  // Try to fold a vpmovuswb 256->128 into a truncating store.
+  // FIXME: Generalize this to other types.
+  // FIXME: Do the same for signed saturation.
+  if (!St->isTruncatingStore() && VT == MVT::v16i8 &&
+      St->getValue().getOpcode() == X86ISD::VTRUNCUS &&
+      St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
+      TLI.isTruncStoreLegal(MVT::v16i16, MVT::v16i8) &&
+      St->getValue().hasOneUse()) {
+    return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+                           dl, St->getValue().getOperand(0), St->getBasePtr(),
+                           MVT::v16i8, St->getMemOperand(), DAG);
+  }
+
    // Optimize trunc store (of multiple scalars) to shuffle and store.
    // First, pack all of the elements in one place. Next, store to memory
    // in fewer chunks.
diff --git a/test/CodeGen/X86/min-legal-vector-width.ll b/test/CodeGen/X86/min-legal-vector-width.ll

index 95e6d3e7ae767f2cd10fe13c1db6baff16c9bc4f..eb90a2ae634dee597b622850e6e2eb3acabd0c1a 100644 (file)
--- a/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1104,8 +1104,7 @@ define void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8>* %q) "min
  ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
  ; CHECK-NEXT:    vpackusdw 32(%rdi), %ymm0, %ymm0
  ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; CHECK-NEXT:    vpmovuswb %ymm0, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, (%rsi)
+; CHECK-NEXT:    vpmovuswb %ymm0, (%rsi)
  ; CHECK-NEXT:    vzeroupper
  ; CHECK-NEXT:    retq
    %a = load <16 x i32>, <16 x i32>* %p
author	Craig Topper <craig.topper@intel.com>
	Fri, 11 Oct 2019 04:16:49 +0000 (04:16 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Fri, 11 Oct 2019 04:16:49 +0000 (04:16 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/min-legal-vector-width.ll		patch \| blob \| history