; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB11_1
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $24, %k0, %k0
; AVX512BW-NEXT: kshiftrd $24, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB12_1
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
-; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $28, %k0, %k0
; AVX512BW-NEXT: kshiftrd $28, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
-; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kmovw %k0, %k1
-; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: vptestmw %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
-; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX1-NEXT: vpackssdw %xmm0, %xmm3, %xmm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: pmaddubsw_bad_extend:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa (%rdi), %xmm0
-; AVX2-NEXT: vmovdqa (%rsi), %xmm1
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm3
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
-; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
-; AVX2-NEXT: vpmulld %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
-; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: pmaddubsw_bad_extend:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512-NEXT: vmovdqa (%rsi), %xmm1
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm3
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
-; AVX512-NEXT: vpshufb %xmm4, %xmm0, %xmm0
-; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm2
-; AVX512-NEXT: vpshufb %xmm4, %xmm1, %xmm1
-; AVX512-NEXT: vpmovsxbd %xmm3, %ymm3
-; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
-; AVX512-NEXT: vpmulld %ymm2, %ymm3, %ymm2
-; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX512-NEXT: vpmovsxbd %xmm1, %ymm1
-; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX256-LABEL: pmaddubsw_bad_extend:
+; AVX256: # %bb.0:
+; AVX256-NEXT: vmovdqa (%rdi), %xmm0
+; AVX256-NEXT: vmovdqa (%rsi), %xmm1
+; AVX256-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX256-NEXT: vpshufb %xmm2, %xmm0, %xmm3
+; AVX256-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
+; AVX256-NEXT: vpshufb %xmm4, %xmm0, %xmm0
+; AVX256-NEXT: vpshufb %xmm2, %xmm1, %xmm2
+; AVX256-NEXT: vpshufb %xmm4, %xmm1, %xmm1
+; AVX256-NEXT: vpmovsxbd %xmm3, %ymm3
+; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
+; AVX256-NEXT: vpmulld %ymm2, %ymm3, %ymm2
+; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX256-NEXT: vpmovsxbd %xmm1, %ymm1
+; AVX256-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX256-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX256-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX256-NEXT: vzeroupper
+; AVX256-NEXT: retq
%A = load <16 x i8>, <16 x i8>* %Aptr
%B = load <16 x i8>, <16 x i8>* %Bptr
%A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; AVX1-NEXT: vpackssdw %xmm0, %xmm3, %xmm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: pmaddubsw_bad_indices:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa (%rdi), %xmm0
-; AVX2-NEXT: vmovdqa (%rsi), %xmm1
-; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
-; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
-; AVX2-NEXT: vpmulld %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
-; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: pmaddubsw_bad_indices:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512-NEXT: vmovdqa (%rsi), %xmm1
-; AVX512-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u]
-; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u]
-; AVX512-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
-; AVX512-NEXT: vpmovsxbd %xmm2, %ymm2
-; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
-; AVX512-NEXT: vpmulld %ymm3, %ymm2, %ymm2
-; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
-; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
-; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX256-LABEL: pmaddubsw_bad_indices:
+; AVX256: # %bb.0:
+; AVX256-NEXT: vmovdqa (%rdi), %xmm0
+; AVX256-NEXT: vmovdqa (%rsi), %xmm1
+; AVX256-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u]
+; AVX256-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u]
+; AVX256-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX256-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
+; AVX256-NEXT: vpmovsxbd %xmm2, %ymm2
+; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
+; AVX256-NEXT: vpmulld %ymm3, %ymm2, %ymm2
+; AVX256-NEXT: vpmovsxbd %xmm0, %ymm0
+; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; AVX256-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX256-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX256-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX256-NEXT: vzeroupper
+; AVX256-NEXT: retq
%A = load <16 x i8>, <16 x i8>* %Aptr
%B = load <16 x i8>, <16 x i8>* %Bptr
%A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14> ;indices aren't all even
;
; AVX512F-LABEL: trunc_packus_v8i32_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
;
; AVX512BW-LABEL: trunc_packus_v8i32_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
;
; AVX512F-LABEL: trunc_packus_v8i32_v8i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
;
; AVX512BW-LABEL: trunc_packus_v8i32_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
;
; AVX512F-LABEL: trunc_packus_v8i32_v8i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v8i32_v8i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v16i16_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
;
; AVX512F-LABEL: trunc_ssat_v8i32_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
;
; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
;
; AVX512F-LABEL: trunc_ssat_v8i32_v8i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
;
; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
;
; AVX512F-LABEL: trunc_ssat_v8i32_v8i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
-; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
-; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127]
-; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168]
-; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v16i16_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;