; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1
; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17]
; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1
-; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; X64-XOP-NEXT: vpand %xmm3, %xmm1, %xmm1
; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0
; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0
-; X64-XOP-NEXT: vpand %xmm3, %xmm0, %xmm0
-; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
+; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14]
; X64-XOP-NEXT: retq
;
; X64-AVX2-LABEL: mul_v16i8_17:
; X64-XOP: # BB#0:
; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1
; X64-XOP-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
-; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; X64-XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0
; X64-XOP-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
-; X64-XOP-NEXT: vpand %xmm2, %xmm0, %xmm0
-; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
+; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14]
; X64-XOP-NEXT: retq
;
; X64-AVX2-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3:
; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1
; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31]
; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1
-; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; X64-XOP-NEXT: vpand %xmm3, %xmm1, %xmm1
; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0
; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0
-; X64-XOP-NEXT: vpand %xmm3, %xmm0, %xmm0
-; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
+; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14]
; X64-XOP-NEXT: retq
;
; X64-AVX2-LABEL: mul_v16i8_31:
; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm1
; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127]
; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1
-; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; X64-XOP-NEXT: vpand %xmm3, %xmm1, %xmm1
; X64-XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-XOP-NEXT: vpmovsxbw %xmm0, %xmm0
; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0
-; X64-XOP-NEXT: vpand %xmm3, %xmm0, %xmm0
-; X64-XOP-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
+; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0,2,4,6,8,10,12,14],xmm0[0,2,4,6,8,10,12,14]
; X64-XOP-NEXT: retq
;
; X64-AVX2-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: