// Shorten the way if the mask is a vector of constants.
if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *OneElt =
Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
%res3 = fadd <16 x float> %res2, %res
ret <16 x float>%res3
}
+
+define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) {
+; KNL_64-LABEL: zero_mask:
+; KNL_64: # %bb.0:
+; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_64-NEXT: kxorw %k0, %k0, %k1
+; KNL_64-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1}
+; KNL_64-NEXT: vzeroupper
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: zero_mask:
+; KNL_32: # %bb.0:
+; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1
+; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1
+; KNL_32-NEXT: kxorw %k0, %k0, %k1
+; KNL_32-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1}
+; KNL_32-NEXT: vzeroupper
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: zero_mask:
+; SKX: # %bb.0:
+; SKX-NEXT: kxorw %k0, %k0, %k1
+; SKX-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: zero_mask:
+; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpsllq $32, %xmm1, %xmm1
+; SKX_32-NEXT: vpsraq $32, %xmm1, %xmm1
+; SKX_32-NEXT: kxorw %k0, %k0, %k1
+; SKX_32-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1}
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer)
+ ret void
+}
ret void
}
+define void @zero_mask(<2 x double>* %addr, <2 x double> %val) {
+; SSE-LABEL: zero_mask:
+; SSE: ## %bb.0:
+; SSE-NEXT: retq
+;
+; AVX1OR2-LABEL: zero_mask:
+; AVX1OR2: ## %bb.0:
+; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vmaskmovpd %xmm0, %xmm1, (%rdi)
+; AVX1OR2-NEXT: retq
+;
+; AVX512F-LABEL: zero_mask:
+; AVX512F: ## %bb.0:
+; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: kxorw %k0, %k0, %k1
+; AVX512F-NEXT: vmovupd %zmm0, (%rdi) {%k1}
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: zero_mask:
+; AVX512VL: ## %bb.0:
+; AVX512VL-NEXT: kxorw %k0, %k0, %k1
+; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) {%k1}
+; AVX512VL-NEXT: retq
+ call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %addr, i32 4, <2 x i1> zeroinitializer)
+ ret void
+}
+
declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)