From: Craig Topper Date: Sun, 2 Jun 2019 22:52:34 +0000 (+0000) Subject: [X86] Add test cases for masked store and masked scatter with an all zeroes mask... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=976880462881361a16b7649dd7c42dba66d57da0;p=llvm [X86] Add test cases for masked store and masked scatter with an all zeroes mask. Fix bug in ScalarizeMaskedMemIntrin Need to cast only to Constant instead of ConstantVector to allow ConstantAggregateZero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362341 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index e2ee9f28f3b..7776dffb4e9 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -488,7 +488,7 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { // Shorten the way if the mask is a vector of constants. if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 8ee23d6feff..2d6b19b334e 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -2964,3 +2964,40 @@ define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %f %res3 = fadd <16 x float> %res2, %res ret <16 x float>%res3 } + +define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) { +; KNL_64-LABEL: zero_mask: +; KNL_64: # %bb.0: +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL_64-NEXT: kxorw %k0, %k0, %k1 +; KNL_64-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: zero_mask: +; KNL_32: # %bb.0: +; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 +; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 +; KNL_32-NEXT: kxorw %k0, %k0, %k1 +; KNL_32-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} +; KNL_32-NEXT: vzeroupper +; KNL_32-NEXT: retl +; +; SKX-LABEL: zero_mask: +; SKX: # %bb.0: +; SKX-NEXT: kxorw %k0, %k0, %k1 +; SKX-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} +; SKX-NEXT: retq +; +; SKX_32-LABEL: zero_mask: +; SKX_32: # %bb.0: +; SKX_32-NEXT: vpsllq $32, %xmm1, %xmm1 +; SKX_32-NEXT: vpsraq $32, %xmm1, %xmm1 +; SKX_32-NEXT: kxorw %k0, %k0, %k1 +; SKX_32-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} +; SKX_32-NEXT: retl + call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer) + ret void +} diff --git a/test/CodeGen/X86/masked_store.ll b/test/CodeGen/X86/masked_store.ll index efbb1ef8cc6..180197ccbcf 100644 --- a/test/CodeGen/X86/masked_store.ll +++ b/test/CodeGen/X86/masked_store.ll @@ -5505,6 +5505,34 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) { ret void } +define void @zero_mask(<2 x double>* %addr, <2 x double> %val) { +; SSE-LABEL: zero_mask: +; SSE: ## %bb.0: +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: zero_mask: +; AVX1OR2: ## %bb.0: +; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vmaskmovpd %xmm0, %xmm1, (%rdi) +; AVX1OR2-NEXT: retq +; +; AVX512F-LABEL: zero_mask: +; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: kxorw %k0, %k0, %k1 +; AVX512F-NEXT: vmovupd %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: zero_mask: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: kxorw %k0, %k0, %k1 +; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) {%k1} +; AVX512VL-NEXT: retq + call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %addr, i32 4, <2 x i1> zeroinitializer) + ret void +} + declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>) declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)