From: Craig Topper Date: Tue, 21 Jun 2016 07:37:27 +0000 (+0000) Subject: [AVX512] Use update_llc_test_checks.py to regenerate a test in preparation for a... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=51ca6fa815de9f269ebf69920f2ea458693b9407;p=llvm [AVX512] Use update_llc_test_checks.py to regenerate a test in preparation for a future commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273252 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll index a59fe393f55..5a2ca4570e1 100644 --- a/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -1,4 +1,4 @@ - +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) @@ -194,13 +194,15 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x f } declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vreducepd {{.*}}{%k1} -; CHECK: vreducepd -; CHECK: {sae} + define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vreducepd $8, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vreducepd $4, {sae}, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4) %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8) %res2 = fadd <8 x double> %res, %res1 @@ -208,14 +210,15 @@ define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 } declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vreduceps -; CHECK: {sae} -; CKECK: {%k1} -; CHECK: vreduceps + define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vreduceps $44, {sae}, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vreduceps $11, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8) %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4) %res2 = fadd <16 x float> %res, %res1 @@ -223,14 +226,15 @@ define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 } declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vrangepd -; CKECK: {%k1} -; CHECK: vrangepd -; CHECK: {sae} + define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4) %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8) %res2 = fadd <8 x double> %res, %res1 @@ -239,14 +243,14 @@ define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vrangeps -; CKECK: {%k1} -; CHECK: vrangeps -; CHECK: {sae} define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4) %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8) %res2 = fadd <16 x float> %res, %res1 @@ -255,14 +259,15 @@ define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vreducess -; CKECK: {%k1} -; CHECK: vreducess -; CHECK: {sae} define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) %res2 = fadd <4 x float> %res, %res1 @@ -270,15 +275,16 @@ define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x floa } declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vrangess -; CHECK: {sae} -; CKECK: {%k1} -; CHECK: vrangess -; CHECK: {sae} + define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_range_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) %res2 = fadd <4 x float> %res, %res1 @@ -287,14 +293,15 @@ define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vreducesd -; CKECK: {%k1} -; CHECK: vreducesd -; CHECK: {sae} define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) %res2 = fadd <2 x double> %res, %res1 @@ -302,14 +309,16 @@ define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x do } declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) -; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vrangesd -; CKECK: {%k1} -; CHECK: vrangesd -; CHECK: {sae} + define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_range_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) %res2 = fadd <2 x double> %res, %res1 @@ -439,14 +448,16 @@ define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i6 declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) -; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vfpclasspd -; CHECK: {%k1} -; CHECK: vfpclasspd -; CHECK: kmovb %k0 define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vfpclasspd $2, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %ecx +; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) %res2 = add i8 %res, %res1 @@ -454,14 +465,16 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { } declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) -; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vfpclassps -; CHECK: vfpclassps -; CHECK: {%k1} -; CHECK: kmov define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1) %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) %res2 = add i16 %res, %res1 @@ -470,14 +483,29 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) -; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vfpclasssd -; CHECK: %k0 {%k1} -; CHECK: vfpclasssd -; CHECK: %k0 define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB28_2 +; CHECK-NEXT: ## BB#1: +; CHECK-NEXT: movb $-1, %al +; CHECK-NEXT: LBB28_2: +; CHECK-NEXT: vfpclasssd $4, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB28_4 +; CHECK-NEXT: ## BB#3: +; CHECK-NEXT: movb $-1, %cl +; CHECK-NEXT: LBB28_4: +; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1) %res2 = add i8 %res, %res1 @@ -486,16 +514,29 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) -; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss -; CHECK-NOT: call -; CHECK: kmovw -; CHECK: vfpclassss -; CHECK: %k0 -; CHECK: {%k1} -; CHECK: kmovw -; CHECK: vfpclassss -; CHECK: %k0 define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB29_2 +; CHECK-NEXT: ## BB#1: +; CHECK-NEXT: movb $-1, %al +; CHECK-NEXT: LBB29_2: +; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB29_4 +; CHECK-NEXT: ## BB#3: +; CHECK-NEXT: movb $-1, %cl +; CHECK-NEXT: LBB29_4: +; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) %res2 = add i8 %res, %res1 @@ -594,12 +635,14 @@ declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512: -; CHECK: kmovw %edi, %k1 -; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm2 {%k1} {z} -; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm1 {%k1} -; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm0 -; CHECK: vaddps %zmm1, %zmm0, %zmm0 -; CHECK: vaddps %zmm0, %zmm2, %zmm0 +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1) %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask) @@ -613,12 +656,14 @@ declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512: -; CHECK: kmovb %edi, %k1 -; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm2 {%k1} {z} -; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm1 {%k1} -; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm0 -; CHECK: vaddpd %zmm1, %zmm0, %zmm0 -; CHECK: vaddpd %zmm0, %zmm2, %zmm0 +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1) %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask) @@ -632,12 +677,14 @@ declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512: -; CHECK: kmovw %edi, %k1 -; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm2 {%k1} {z} -; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm1 {%k1} -; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm0 -; CHECK: vpaddd %zmm1, %zmm0, %zmm0 -; CHECK: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1) %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) @@ -651,12 +698,14 @@ declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512: -; CHECK: kmovb %edi, %k1 -; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm2 {%k1} {z} -; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm1 {%k1} -; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 -; CHECK: vpaddq %zmm1, %zmm0, %zmm0 -; CHECK: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1) %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask)