[(set VR512:$dst, (v16i32 immAllZerosV))]>;
}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, Predicates = [HasVLX] in {
+def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
+ [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
+def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
+ [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
+}
+
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
case X86::AVX_SET0:
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
+ case X86::AVX512_128_SET0:
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZ128rr));
+ case X86::AVX512_256_SET0:
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
case X86::AVX512_512_SET0:
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
case X86::V_SETALLONES:
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [NoVLX], SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
+let Predicates = [NoVLX] in
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [HasAVX, NoVLX], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllZerosV))]>;
}
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
;
; AVX512VL-LABEL: test_mask_vminpd:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512VL-NEXT: retq
;
; SKX-LABEL: test_mask_vminpd:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
;
; AVX512VL-LABEL: test_mask_vmaxpd:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512VL-NEXT: retq
;
; SKX-LABEL: test_mask_vmaxpd:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
define <16 x i1> @test1() {
-; ALL_X64-LABEL: test1:
-; ALL_X64: ## BB#0:
-; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; ALL_X64-NEXT: retq
+; KNL-LABEL: test1:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; SKX-NEXT: retq
;
; KNL_X32-LABEL: test1:
; KNL_X32: ## BB#0:
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
;
; SKX-LABEL: test7:
; SKX: ## BB#0:
-; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1
; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test8:
; SKX: ## BB#0:
-; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test44:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
;
; SKX-LABEL: test45:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xda]
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xca]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## encoding: [0xc5,0xd9,0xef,0xe4]
+; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4]
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
; CHECK-NEXT: vpaddb %xmm4, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc4]
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xda]
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca]
-; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## encoding: [0xc5,0xdd,0xef,0xe4]
+; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
; CHECK-NEXT: vpaddb %ymm4, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc4]
; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfc,0xc1]
; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xda]
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xca]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## encoding: [0xc5,0xd9,0xef,0xe4]
+; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4]
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
; CHECK-NEXT: vpaddb %xmm4, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc4]
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xda]
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca]
-; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## encoding: [0xc5,0xdd,0xef,0xe4]
+; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
; CHECK-NEXT: vpaddb %ymm4, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc4]
; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfc,0xc1]
; CHECK: kmovw %edi, %k1
; CHECK: vmovaps %zmm0, %zmm3
; CHECK: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK: vpxor %xmm4, %xmm4, %xmm4
+; CHECK: vpxord %xmm4, %xmm4, %xmm4
; CHECK: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z}
; CHECK: vfixupimmpd $3, %xmm2, %xmm1, %xmm0
; CHECK: vaddpd %xmm4, %xmm3, %xmm1
; CHECK: kmovw %edi, %k1
; CHECK: vmovaps %zmm0, %zmm3
; CHECK: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK: vpxor %xmm2, %xmm2, %xmm2
+; CHECK: vpxord %xmm2, %xmm2, %xmm2
; CHECK: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK: vaddpd %xmm0, %xmm3, %xmm0
%res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4)
; CHECK: kmovw %edi, %k1
; CHECK: vmovaps %zmm0, %zmm3
; CHECK: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1}
-; CHECK: vpxor %ymm4, %ymm4, %ymm4
+; CHECK: vpxord %ymm4, %ymm4, %ymm4
; CHECK: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z}
; CHECK: vfixupimmpd $3, %ymm2, %ymm1, %ymm0
; CHECK: vaddpd %ymm4, %ymm3, %ymm1
; CHECK: kmovw %edi, %k1
; CHECK: vmovaps %zmm0, %zmm3
; CHECK: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z}
-; CHECK: vpxor %ymm4, %ymm4, %ymm4
+; CHECK: vpxord %ymm4, %ymm4, %ymm4
; CHECK: vmovaps %zmm0, %zmm5
; CHECK: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z}
; CHECK: vfixupimmpd $3, %ymm2, %ymm1, %ymm0
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK: vmovaps %zmm0, %zmm4
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm4
-; CHECK: vpxor %xmm2, %xmm2, %xmm2
+; CHECK: vpxord %xmm2, %xmm2, %xmm2
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1}
; CHECK: vaddps %xmm0, %xmm3, %xmm0
; CHECK: vaddps %xmm4, %xmm0, %xmm0
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK: vmovaps %zmm0, %zmm4
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm4
-; CHECK: vpxor %xmm2, %xmm2, %xmm2
+; CHECK: vpxord %xmm2, %xmm2, %xmm2
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK: vaddps %xmm0, %xmm3, %xmm0
; CHECK: vaddps %xmm4, %xmm0, %xmm0
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1}
; CHECK: vmovaps %zmm0, %zmm4
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm4
-; CHECK: vpxor %ymm2, %ymm2, %ymm2
+; CHECK: vpxord %ymm2, %ymm2, %ymm2
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1}
; CHECK: vaddps %ymm0, %ymm3, %ymm0
; CHECK: vaddps %ymm4, %ymm0, %ymm0
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z}
; CHECK: vmovaps %zmm0, %zmm4
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm4
-; CHECK: vpxor %ymm2, %ymm2, %ymm2
+; CHECK: vpxord %ymm2, %ymm2, %ymm2
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z}
; CHECK: vaddps %ymm0, %ymm3, %ymm0
; CHECK: vaddps %ymm4, %ymm0, %ymm0
define <8 x float> @_ss8xfloat_mask(<8 x float> %i, float %a, <8 x i32> %mask1) {
; CHECK-LABEL: _ss8xfloat_mask:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; CHECK-NEXT: vpxord %ymm3, %ymm3, %ymm3
; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
; CHECK-NEXT: retq
define <8 x float> @_ss8xfloat_maskz(float %a, <8 x i32> %mask1) {
; CHECK-LABEL: _ss8xfloat_maskz:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @_ss4xfloat_mask(<4 x float> %i, float %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xfloat_mask:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vpxord %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpneqd %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
; CHECK-NEXT: retq
define <4 x float> @_ss4xfloat_maskz(float %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xfloat_maskz:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @_ss4xdouble_mask(<4 x double> %i, double %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xdouble_mask:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vpxord %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpneqd %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
; CHECK-NEXT: retq
define <4 x double> @_ss4xdouble_maskz(double %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xdouble_maskz:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
;
; AVX512-LABEL: test_v4f32_fneg_fmul:
; AVX512: # BB#0:
-; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpxord %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%m = fmul nsz <4 x float> %x, %y
;
; AVX512-LABEL: test_v4f64_fneg_fmul:
; AVX512: # BB#0:
-; AVX512-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vpxord %ymm2, %ymm2, %ymm2
; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
; AVX512-NEXT: retq
%m = fmul nsz <4 x double> %x, %y
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx2 < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
;
; SKX-LABEL: test5:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2
; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test6:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vmovupd (%rdi), %xmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test7:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test8:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test9:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
; SKX-NEXT: retq
;
; SKX-LABEL: test10:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovapd (%rdi), %ymm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test10b:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; SKX-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
;
; SKX-LABEL: test11a:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2
; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; SKX-NEXT: vmovaps (%rdi), %ymm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
;
; SKX-LABEL: test12:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2
; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; SKX-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1}
; SKX-NEXT: retq
;
; SKX-LABEL: test14:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
;
; SKX-LABEL: test15:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
;
; SKX-LABEL: test16:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
;
; SKX-LABEL: test17:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
;
; SKX-LABEL: test18:
; SKX: ## BB#0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
define <4 x float> @load_one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
; AVX-LABEL: load_one_mask_bit_set2:
; AVX: ## BB#0:
-; AVX-NEXT: vinsertps $32, 8(%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX-NEXT: retq
;
; AVX512-LABEL: load_one_mask_bit_set2:
; AVX512: ## BB#0:
-; AVX512-NEXT: vinsertps $32, 8(%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX512-NEXT: retq
%res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x float> %val)
ret <4 x float> %res
; AVX-LABEL: load_one_mask_bit_set4:
; AVX: ## BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmovhpd 24(%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0]
+; AVX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: load_one_mask_bit_set4:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vmovhpd 24(%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0]
+; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; SKX-LABEL: load_one_mask_bit_set4:
; SKX: ## BB#0:
; SKX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; SKX-NEXT: vmovhpd 24(%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0]
+; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; SKX-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
%res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>, <4 x double> %val)
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
;
; AVX512VL-LABEL: shuffle_v2i64_z1:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
;
; AVX512VL-LABEL: shuffle_v2f64_1z:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
;
; AVX512VL-LABEL: shuffle_v2f64_z0:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v2f64_z1:
-; AVX: # BB#0:
-; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v2f64_z1:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v2f64_z1:
+; AVX2: # BB#0:
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2f64_z1:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
ret <2 x double> %shuffle
}
;
; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; AVX512VL-NEXT: retq
%shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
;
; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX512VL-NEXT: retq
%bitcast32 = bitcast <2 x i64> %x to <4 x float>
;
; AVX512CDVL-LABEL: testv2i64u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vplzcntq %xmm0, %xmm0
;
; AVX512CDVL-LABEL: testv4i32:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm2
; AVX512CDVL-NEXT: vpandd %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubd {{.*}}(%rip){1to4}, %xmm0, %xmm0
;
; AVX512CDVL-LABEL: testv4i32u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpandd %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vplzcntd %xmm0, %xmm0
;
; AVX512CDVL-LABEL: testv8i16:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
;
; AVX512CDVL-LABEL: testv8i16u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
;
; AVX512CDVL-LABEL: testv16i8:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
;
; AVX512CDVL-LABEL: testv16i8u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
;
; AVX512CDVL-LABEL: testv4i64:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm2
; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubq {{.*}}(%rip){1to4}, %ymm0, %ymm0
;
; AVX512CDVL-LABEL: testv4i64u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vplzcntq %ymm0, %ymm0
;
; AVX512CDVL-LABEL: testv8i32:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm2
; AVX512CDVL-NEXT: vpandd %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubd {{.*}}(%rip){1to8}, %ymm0, %ymm0
;
; AVX512CDVL-LABEL: testv8i32u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpandd %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vplzcntd %ymm0, %ymm0
;
; AVX512CDVL-LABEL: testv16i16:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
;
; AVX512CDVL-LABEL: testv16i16u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
;
; AVX512CDVL-LABEL: testv32i8:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0
;
; AVX512CDVL-LABEL: testv32i8u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0