define <16 x float> @test_mm512_mask_shuffle_f32x4(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_shuffle_f32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_shuffle_f32x4(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_shuffle_f32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_shuffle_i32x4(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X86-LABEL: test_mm512_mask_shuffle_i32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_shuffle_i32x4(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X86-LABEL: test_mm512_maskz_shuffle_i32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
define zeroext i16 @test_mm512_mask_testn_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_testn_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: movzwl %ax, %eax
define zeroext i16 @test_mm512_mask_test_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_test_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-LABEL: test_mm512_mask_set1_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpbroadcastd %eax, %zmm0 {%k1}
; X86-NEXT: retl
;
; X86-LABEL: test_mm512_maskz_set1_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
; X86-LABEL: test_mm512_mask_broadcastd_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
; X86-LABEL: test_mm512_maskz_broadcastd_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
; X86-LABEL: test_mm512_mask_broadcastss_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
; X86-LABEL: test_mm512_maskz_broadcastss_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_mask_movehdup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
; X86-LABEL: test_mm512_maskz_movehdup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_mask_moveldup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
; X86-LABEL: test_mm512_maskz_moveldup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_mask_permute_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
; X86-LABEL: test_mm512_maskz_permute_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_mask_shuffle_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
; X86-LABEL: test_mm512_maskz_shuffle_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X86-LABEL: test_mm512_mask_unpackhi_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_maskz_unpackhi_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
; X86-LABEL: test_mm512_mask_unpackhi_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_maskz_unpackhi_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X86-LABEL: test_mm512_mask_unpacklo_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_maskz_unpacklo_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
; X86-LABEL: test_mm512_mask_unpacklo_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_maskz_unpacklo_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X86-NEXT: retl
;
define <2 x i64> @test_mm512_mask_cvtepi32_epi8(<2 x i64> %__O, i16 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_cvtepi32_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
define <2 x i64> @test_mm512_maskz_cvtepi32_epi8(i16 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_cvtepi32_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_ternarylogic_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
; X86-LABEL: test_mm512_mask_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_ternarylogic_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X86-LABEL: test_mm512_maskz_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask2_permutex2var_epi32(<8 x i64> %__A, <8 x i64> %__I, i16 zeroext %__U, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask2_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2d %zmm2, %zmm0, %zmm1 {%k1}
; X86-NEXT: vmovdqa64 %zmm1, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_mask2_permutex2var_ps(<16 x float> %__A, <8 x i64> %__I, i16 zeroext %__U, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask2_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
; X86-NEXT: vmovaps %zmm1, %zmm0
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_permutex2var_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_permutex2var_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__I, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_permutex2var_ps(<16 x float> %__A, i16 zeroext %__U, <8 x i64> %__I, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_permutex2var_ps(i16 zeroext %__U, <16 x float> %__A, <8 x i64> %__I, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_maskz_fmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_fmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fnmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_maskz_fnmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_fnmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_maskz_fmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) - zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_fmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fnmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) + zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_maskz_fnmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_fnmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmaddsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmaddsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmaddsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_maskz_fmaddsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmaddsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmsubadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsubadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_fmsubadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsubadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmaddsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_maskz_fmaddsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fmsubadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) -/+ zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_fmsubadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_mask3_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_mask3_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsubadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_mask3_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_mask_fnmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fnmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_fnmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
define <16 x float> @test_mm512_mask_fnmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask3_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
; X86-LABEL: test_mm512_mask_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1}
; X86-NEXT: retl
;
; X86-LABEL: test_mm512_maskz_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X86-LABEL: test_mm512_mask_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1}
; X86-NEXT: retl
;
; X86-LABEL: test_mm512_maskz_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define void @test_mm512_mask_compressstoreu_ps(i8* %__P, i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_compressstoreu_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vcompressps %zmm0, (%eax) {%k1}
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vcompressps %zmm0, (%ecx) {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
define void @test_mm512_mask_compressstoreu_epi32(i8* %__P, i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_compressstoreu_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1}
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vpcompressd %zmm0, (%ecx) {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
define i32 @test_mm512_mask_reduce_add_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_add_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0
define i32 @test_mm512_mask_reduce_mul_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_mul_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
define i32 @test_mm512_mask_reduce_and_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_and_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
define i32 @test_mm512_mask_reduce_or_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_or_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X86-NEXT: vpor %ymm1, %ymm0, %ymm0
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_max_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_max_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_min_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_min_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
define <16 x float> @test_mm512_mask_max_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_max_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_max_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_max_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_max_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_max_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_max_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_max_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_min_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_min_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_min_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_min_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_min_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_min_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_min_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_min_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_sqrt_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_sqrt_ps(i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_maskz_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_mask_sqrt_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_sqrt_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps {rn-sae}, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <16 x float> @test_mm512_maskz_sqrt_round_ps(i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_maskz_sqrt_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps {rn-sae}, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_rol_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_rol_epi32(i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_rolv_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_rolv_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_ror_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_ror_epi32(i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_mask_rorv_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
define <8 x i64> @test_mm512_maskz_rorv_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2]
; X86-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2]
; X86-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2]
; X86-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2]
; X86-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2]
; X86-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2]
; X86-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2]
; X86-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2]
; X86-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2]
; X86-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2]
; X86-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2]
; X86-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2]
; X86-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]