; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s
-declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
-declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
-
-declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
-declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
-
define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dps:
; CHECK: ## %bb.0:
%1 = bitcast i16 %mask to <16 x i1>
%x = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> %1, i32 4)
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %stbuf, <16 x i1> %1, <16 x i32> %ind2, <16 x float> %x, i32 4)
ret void
}
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> %src, i8* %base, <8 x i32> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %stbuf, <8 x i1> %1, <8 x i32> %ind2, <8 x double> %x, i32 4)
ret void
}
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x float> %x, i32 4)
ret void
}
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x double> %x, i32 4)
ret void
}
;;
;; Integer Gather/Scatter
;;
-declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
-declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
-
-declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
-declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dd:
%1 = bitcast i16 %mask to <16 x i1>
%x = call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32> %src, i8* %base, <16 x i32> %ind, <16 x i1> %1, i32 4)
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpi.512(i8* %stbuf, <16 x i1> %1, <16 x i32> %ind2, <16 x i32> %x, i32 4)
ret void
}
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpi.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x i32> %x, i32 4)
ret void
}
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpq.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x i64> %x, i32 4)
ret void
}
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64> %src, i8* %base, <8 x i32> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpq.512(i8* %stbuf, <8 x i1> %1, <8 x i32> %ind2, <8 x i64> %x, i32 4)
ret void
}
; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i8 %mask to <8 x i1>
%x = load <8 x double>, <8 x double>* %src, align 64
- call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %stbuf, <8 x i1> %1, <8 x i32>%ind, <8 x double> %x, i32 4)
ret void
}
; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i8 %mask to <8 x i1>
%x = load <8 x double>, <8 x double>* %src, align 64
- call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %stbuf, <8 x i1> %1, <8 x i64>%ind, <8 x double> %x, i32 4)
ret void
}
; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i16 %mask to <16 x i1>
%x = load <16 x float>, <16 x float>* %src, align 64
- call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %stbuf, <16 x i1> %1, <16 x i32>%ind, <16 x float> %x, i32 4)
ret void
}
; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i8 %mask to <8 x i1>
%x = load <8 x float>, <8 x float>* %src, align 32
- call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> %1, <8 x i64>%ind, <8 x float> %x, i32 4)
ret void
}
; CHECK-NEXT: retq
%x = call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %src, i8* %base, <8 x i64> %ind, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> %ind2, <8 x float> %x, i32 4)
ret void
}
ret <8 x i32> %res2
}
-declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
-
define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
; CHECK: ## %bb.0:
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <2 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <2 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
-
define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
; CHECK: ## %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <2 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <2 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
; CHECK: ## %bb.0:
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
; CHECK: ## %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <4 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <4 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
-
define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
; CHECK: ## %bb.0:
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
-
define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
-
define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
; CHECK: ## %bb.0:
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %x0, <2 x i1> <i1 true, i1 true>, <4 x i32> %x2, <2 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %x0, <2 x i1> %2, <4 x i32> %x2, <2 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
-
define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %x0, <2 x i1> <i1 true, i1 true>, <4 x i32> %x2, <2 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %x0, <2 x i1> %2, <4 x i32> %x2, <2 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
-
define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
; CHECK: ## %bb.0:
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
-
define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
-
define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
; CHECK: ## %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
-
define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
; CHECK: ## %bb.0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x i32> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
-
define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
; CHECK: ## %bb.0:
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %x0, <8 x i1> %1, <8 x i32> %x2, <8 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
-
define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> %1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x i32> %x3, i32 4)
ret void
}
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> zeroinitializer, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> bitcast (<1 x i8> <i8 96> to <8 x i1>), <8 x i32> %x2, <8 x i32> %x3, i32 4)
ret void
}
declare <4 x i32> @llvm.x86.avx512.mask.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, <4 x i1>, i32)
declare <8 x float> @llvm.x86.avx512.mask.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.x86.avx512.mask.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, <8 x i1>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dps.512(i8*, <16 x i1>, <16 x i32>, <16 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dpd.512(i8*, <8 x i1>, <8 x i32>, <8 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qps.512(i8*, <8 x i1>, <8 x i64>, <8 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qpd.512(i8*, <8 x i1>, <8 x i64>, <8 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dpi.512(i8*, <16 x i1>, <16 x i32>, <16 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dpq.512(i8*, <8 x i1>, <8 x i32>, <8 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qpi.512(i8*, <8 x i1>, <8 x i64>, <8 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qpq.512(i8*, <8 x i1>, <8 x i64>, <8 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv2.df(i8*, <2 x i1>, <2 x i64>, <2 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv2.di(i8*, <2 x i1>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.df(i8*, <4 x i1>, <4 x i64>, <4 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.di(i8*, <4 x i1>, <4 x i64>, <4 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.sf(i8*, <2 x i1>, <2 x i64>, <4 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.si(i8*, <2 x i1>, <2 x i64>, <4 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv8.sf(i8*, <4 x i1>, <4 x i64>, <4 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv8.si(i8*, <4 x i1>, <4 x i64>, <4 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv2.df(i8*, <2 x i1>, <4 x i32>, <2 x double>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv2.di(i8*, <2 x i1>, <4 x i32>, <2 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.df(i8*, <4 x i1>, <4 x i32>, <4 x double>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.di(i8*, <4 x i1>, <4 x i32>, <4 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.sf(i8*, <4 x i1>, <4 x i32>, <4 x float>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.si(i8*, <4 x i1>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv8.sf(i8*, <8 x i1>, <8 x i32>, <8 x float>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv8.si(i8*, <8 x i1>, <8 x i32>, <8 x i32>, i32)
+