From: Guy Blank Date: Wed, 9 Aug 2017 17:21:01 +0000 (+0000) Subject: [X86][AVX512] Choose correct registers in vpbroadcastb/w X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d82d9c3fbeeff7105981cd94b08b9d5b88821be3;p=llvm [X86][AVX512] Choose correct registers in vpbroadcastb/w Fixes the vpbroadcastb/w instructions which use GPRs as source operands, to use the correct registers. The full GPR should be used, and not the subregister, as it happens before the patch. Fixes pr33795 Differential Revision: https://reviews.llvm.org/D36479 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310498 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 1a5aff7e295..65af84c9513 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -978,6 +978,44 @@ multiclass avx512_int_broadcast_reg opc, X86VectorVTInfo _, (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX; } +multiclass avx512_int_broadcastbw_reg opc, string Name, + X86VectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg> { + let ExeDomain = _.ExeDomain in + defm r : AVX512_maskable_custom, T8PD, EVEX; + + def : Pat <(_.VT (OpNode SrcRC:$src)), + (!cast(Name#r) + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), + (!cast(Name#rk) _.RC:$src0, _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), + (!cast(Name#rkz) _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; +} + +multiclass avx512_int_broadcastbw_reg_vl opc, string Name, + AVX512VLVectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_int_broadcastbw_reg, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_int_broadcastbw_reg, EVEX_V256; + defm Z128 : avx512_int_broadcastbw_reg, EVEX_V128; + } +} + multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { @@ -989,18 +1027,11 @@ multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, } } -let isCodeGenOnly = 1 in { -defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - X86VBroadcast, GR8, HasBWI>; -defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - X86VBroadcast, GR16, HasBWI>; -} -let isAsmParserOnly = 1 in { - defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - null_frag, GR32, HasBWI>; - defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - null_frag, GR32, HasBWI>; -} +defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", + avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; +defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", + avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, + HasBWI>; defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, X86VBroadcast, GR32, HasAVX512>; defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index c5876ef6220..47bcc08dbb0 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1921,9 +1921,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovq %rsi, %k1 -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2 +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm2 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq @@ -1934,9 +1934,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2 +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm2 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl @@ -1954,20 +1954,20 @@ define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm2 +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm2 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2 +; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm2 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index c3ba6f106e6..9ceb3e5931a 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2799,9 +2799,9 @@ define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] ; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2819,9 +2819,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2839,9 +2839,9 @@ define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2859,9 +2859,9 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 67f0b48ecad..977066c9290 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1635,7 +1635,7 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 @@ -1688,7 +1688,7 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index c03b9d1472c..1cf8453fc6a 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2274,7 +2274,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2390,7 +2390,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2443,7 +2443,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index ec760fbd809..363b3c26b51 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -4009,7 +4009,7 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) { ; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %ymm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index 0bca815e622..410afd91713 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2431,7 +2431,7 @@ define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %ymm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll index 7d60907b934..02955df86c8 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -228,7 +228,7 @@ define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) { ; SKX-LABEL: insert_dup_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -249,7 +249,7 @@ define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) { ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16: ; SKX: ## BB#0: ; SKX-NEXT: movswl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -269,7 +269,7 @@ define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(i32* %ptr) #0 { ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -288,7 +288,7 @@ define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 { ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 diff --git a/test/CodeGen/X86/vector-shuffle-512-v64.ll b/test/CodeGen/X86/vector-shuffle-512-v64.ll index b2fe8ba9092..836ebbc18f8 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -330,7 +330,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) { ; AVX512BW: # BB#0: ; AVX512BW-NEXT: movsbl (%rdi), %eax ; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: vpbroadcastb %al, %zmm0 +; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: @@ -346,7 +346,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) { ; AVX512VBMI: # BB#0: ; AVX512VBMI-NEXT: movsbl (%rdi), %eax ; AVX512VBMI-NEXT: shrl $8, %eax -; AVX512VBMI-NEXT: vpbroadcastb %al, %zmm0 +; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512VBMI-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32