From 976880462881361a16b7649dd7c42dba66d57da0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 2 Jun 2019 22:52:34 +0000 Subject: [PATCH] [X86] Add test cases for masked store and masked scatter with an all zeroes mask. Fix bug in ScalarizeMaskedMemIntrin Need to cast only to Constant instead of ConstantVector to allow ConstantAggregateZero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362341 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScalarizeMaskedMemIntrin.cpp | 2 +- test/CodeGen/X86/masked_gather_scatter.ll | 37 +++++++++++++++++++++++ test/CodeGen/X86/masked_store.ll | 28 +++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index e2ee9f28f3b..7776dffb4e9 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -488,7 +488,7 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { // Shorten the way if the mask is a vector of constants. if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 8ee23d6feff..2d6b19b334e 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -2964,3 +2964,40 @@ define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %f %res3 = fadd <16 x float> %res2, %res ret <16 x float>%res3 } + +define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) { +; KNL_64-LABEL: zero_mask: +; KNL_64: # %bb.0: +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL_64-NEXT: kxorw %k0, %k0, %k1 +; KNL_64-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: zero_mask: +; KNL_32: # %bb.0: +; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 +; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 +; KNL_32-NEXT: kxorw %k0, %k0, %k1 +; KNL_32-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} +; KNL_32-NEXT: vzeroupper +; KNL_32-NEXT: retl +; +; SKX-LABEL: zero_mask: +; SKX: # %bb.0: +; SKX-NEXT: kxorw %k0, %k0, %k1 +; SKX-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} +; SKX-NEXT: retq +; +; SKX_32-LABEL: zero_mask: +; SKX_32: # %bb.0: +; SKX_32-NEXT: vpsllq $32, %xmm1, %xmm1 +; SKX_32-NEXT: vpsraq $32, %xmm1, %xmm1 +; SKX_32-NEXT: kxorw %k0, %k0, %k1 +; SKX_32-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} +; SKX_32-NEXT: retl + call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer) + ret void +} diff --git a/test/CodeGen/X86/masked_store.ll b/test/CodeGen/X86/masked_store.ll index efbb1ef8cc6..180197ccbcf 100644 --- a/test/CodeGen/X86/masked_store.ll +++ b/test/CodeGen/X86/masked_store.ll @@ -5505,6 +5505,34 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) { ret void } +define void @zero_mask(<2 x double>* %addr, <2 x double> %val) { +; SSE-LABEL: zero_mask: +; SSE: ## %bb.0: +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: zero_mask: +; AVX1OR2: ## %bb.0: +; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vmaskmovpd %xmm0, %xmm1, (%rdi) +; AVX1OR2-NEXT: retq +; +; AVX512F-LABEL: zero_mask: +; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: kxorw %k0, %k0, %k1 +; AVX512F-NEXT: vmovupd %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: zero_mask: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: kxorw %k0, %k0, %k1 +; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) {%k1} +; AVX512VL-NEXT: retq + call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %addr, i32 4, <2 x i1> zeroinitializer) + ret void +} + declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>) declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) -- 2.40.0