return false;
}
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
- if (Subtarget.hasSSE2())
- // v16i16 -> v16i8
- if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
- return true;
- if (Subtarget.hasSSE41())
- // v8i32 -> v8i16
- if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
- return true;
- return false;
-}
-
/// Detect a pattern of truncation with saturation:
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// Return the source value to be truncated or SDValue() if the pattern was not
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
return SDValue();
- SDValue USatVal = detectUSatPattern(In, VT);
- if (USatVal) {
+ if (auto USatVal = detectUSatPattern(In, VT))
if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
- if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
- return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
- }
- }
return SDValue();
}
ret <16 x i8> %B
}
-define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
-; CHECK-LABEL: usat_trunc_wb_256:
-; CHECK: # BB#0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
- %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
- %x6 = trunc <16 x i16> %x5 to <16 x i8>
- ret <16 x i8> %x6
-}
-define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
-; CHECK-LABEL: usat_trunc_dw_256:
-; CHECK: # BB#0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
- %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
- %x6 = trunc <8 x i32> %x5 to <8 x i16>
- ret <8 x i16> %x6
-}
define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
; KNL-LABEL: usat_trunc_wb_256_mem:
; KNL: ## BB#0:
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vmovdqu %xmm0, (%rdi)
; KNL-NEXT: retq
;
define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
; KNL-LABEL: usat_trunc_wb_256:
; KNL: ## BB#0:
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: usat_trunc_wb_256:
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number.
+define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
+; CHECK-LABEL: usat_trunc_wb_256:
+; CHECK-NOT: vpackuswb %xmm1, %xmm0, %xmm0
+ %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x6 = trunc <16 x i16> %x5 to <16 x i8>
+ ret <16 x i8> %x6
+}
+
+define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
+; CHECK-LABEL: usat_trunc_dw_256:
+; CHECK-NOT: vpackusdw %xmm1, %xmm0, %xmm0
+ %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x6 = trunc <8 x i32> %x5 to <8 x i16>
+ ret <8 x i16> %x6
+}