return SDValue();
EVT VT = Mst->getValue().getValueType();
+ EVT StVT = Mst->getMemoryVT();
+ SDLoc dl(Mst);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
if (!Mst->isTruncatingStore()) {
if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
return ScalarStore;
// simplify ops leading up to it. We only demand the MSB of each lane.
SDValue Mask = Mst->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
return SDValue(N, 0);
// pattern above, but that pattern will be different. It will either need to
// match setcc more generally or match PCMPGTM later (in tablegen?).
+ SDValue Value = Mst->getValue();
+ if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ Mst->getMemoryVT())) {
+ return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
+ Mst->getBasePtr(), Mask,
+ Mst->getMemoryVT(), Mst->getMemOperand(), true);
+ }
+
return SDValue();
}
// Resolve truncating stores.
unsigned NumElems = VT.getVectorNumElements();
- EVT StVT = Mst->getMemoryVT();
- SDLoc dl(Mst);
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getScalarSizeInBits();
unsigned ToSz = StVT.getScalarSizeInBits();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
// The truncating store is legal in some cases. For example
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
// are designated for truncate store.
; AVX512F-LABEL: truncstore_v8i64_v8i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $8, %k0, %k1
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: truncstore_v8i64_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512VL-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512VL-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1}
+; AVX512VL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: truncstore_v8i64_v8i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kshiftlw $8, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $8, %k0, %k1
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v8i64_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kshiftld $24, %k0, %k0
-; AVX512BW-NEXT: kshiftrd $24, %k0, %k1
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v8i64_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v8i64_v8i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
-; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v8i64_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512VL-LABEL: truncstore_v4i64_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512VL-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1}
+; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v4i64_v4i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v4i64_v4i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v16i32_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v16i32_v16i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512BWVL-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v16i32_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v16i32_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v8i32_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovdw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v8i32_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v32i16_v32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kmovd %k0, %k1
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v32i16_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BWVL-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <32 x i8> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v16i16_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i8> %mask, zeroinitializer
; AVX512F-LABEL: truncstore_v8i64_v8i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $8, %k0, %k1
-; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0
-; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512F-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: truncstore_v8i64_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512VL-NEXT: vpmovsqd %zmm0, %ymm0
-; AVX512VL-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1}
+; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512VL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: truncstore_v8i64_v8i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kshiftlw $8, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $8, %k0, %k1
-; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v8i64_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kshiftld $24, %k0, %k0
-; AVX512BW-NEXT: kshiftrd $24, %k0, %k1
-; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0
-; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v8i64_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovsqw %zmm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v8i64_v8i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
-; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512VL-LABEL: truncstore_v4i64_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512VL-NEXT: vpmovsqd %ymm0, %xmm0
-; AVX512VL-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1}
+; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v16i32_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BW-NEXT: vpmovsdw %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v16i32_v16i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpmovsdw %zmm0, %ymm0
-; AVX512BWVL-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v16i32_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v16i32_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpmovsdb %zmm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v8i32_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovsdw %ymm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovdw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v32i16_v32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kmovd %k0, %k1
-; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v32i16_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovswb %zmm0, %ymm0
-; AVX512BWVL-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <32 x i8> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v16i16_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpmovswb %ymm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i8> %mask, zeroinitializer
; AVX512F-LABEL: truncstore_v8i64_v8i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $8, %k0, %k1
-; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
-; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512F-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: truncstore_v8i64_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512VL-NEXT: vpmovusqd %zmm0, %ymm0
-; AVX512VL-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1}
+; AVX512VL-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512VL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: truncstore_v8i64_v8i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kshiftlw $8, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $8, %k0, %k1
-; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v8i64_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kshiftld $24, %k0, %k0
-; AVX512BW-NEXT: kshiftrd $24, %k0, %k1
-; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
-; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v8i64_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovusqw %zmm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v8i64_v8i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
-; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512VL-LABEL: truncstore_v4i64_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512VL-NEXT: vpmovusqd %ymm0, %xmm0
-; AVX512VL-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1}
+; AVX512VL-NEXT: vpminuq {{.*}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v16i32_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v16i32_v16i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpmovusdw %zmm0, %ymm0
-; AVX512BWVL-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v16i32_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v16i32_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpmovusdb %zmm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v8i32_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovusdw %ymm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovdw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
; AVX512BW-LABEL: truncstore_v32i16_v32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0
-; AVX512BW-NEXT: kmovd %k0, %k1
-; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k1
+; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: truncstore_v32i16_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpmovuswb %zmm0, %ymm0
-; AVX512BWVL-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminuw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <32 x i8> %mask, zeroinitializer
; AVX512BWVL-LABEL: truncstore_v16i16_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpmovuswb %ymm0, %xmm0
-; AVX512BWVL-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i8> %mask, zeroinitializer