return Tmp;
// Simplify the expression using non-local knowledge.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
// If we can't force bits, try to shrink the constant.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt Expanded = C->getAPIntValue() | (~NewMask);
// If we can expand it to have all bits set, do it.
if (Expanded.isAllOnesValue()) {
; CHECK-LABEL: hidden_not_v4i32:
; CHECK: @ BB#0:
; CHECK-NEXT: vmov d19, r2, r3
-; CHECK-NEXT: vmov.i32 q8, #0xf
+; CHECK-NEXT: vmov.i32 q8, #0x6
; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vmov.i32 q10, #0x6
-; CHECK-NEXT: veor q8, q9, q8
-; CHECK-NEXT: vand q8, q8, q10
+; CHECK-NEXT: vbic q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: bx lr
define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) {
; CHECK-LABEL: hidden_not_v4i32:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisw 3, 15
-; CHECK-NEXT: vspltisw 4, 6
-; CHECK-NEXT: xxlxor 0, 34, 35
-; CHECK-NEXT: xxland 34, 0, 36
+; CHECK-NEXT: vspltisw 3, 6
+; CHECK-NEXT: xxlandc 34, 35, 34
; CHECK-NEXT: blr
%xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
%and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6>
define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind {
; AVX-LABEL: and_xor_splat1_v4i32:
; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
-; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: and_xor_splat1_v4i32:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%xor = xor <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%and = and <4 x i32> %xor, <i32 1, i32 1, i32 1, i32 1>
define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind {
; AVX-LABEL: and_xor_splat1_v4i64:
; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1]
-; AVX-NEXT: vxorps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: and_xor_splat1_v4i64:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
-; AVX512-NEXT: vxorps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vandnps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%xor = xor <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
%and = and <4 x i64> %xor, <i64 1, i64 1, i64 1, i64 1>
define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
; KNL-LABEL: store_v2i1:
; KNL: ## BB#0:
-; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
;
; AVX512BW-LABEL: store_v2i1:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
;
; AVX512DQ-LABEL: store_v2i1:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
; KNL-LABEL: store_v4i1:
; KNL: ## BB#0:
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpslld $31, %ymm0, %ymm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
;
; AVX512BW-LABEL: store_v4i1:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
;
; AVX512DQ-LABEL: store_v4i1:
; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
; X64-SSE-NEXT: movdqa %xmm0, %xmm2
; X64-SSE-NEXT: pxor %xmm1, %xmm2
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551361,18446744073709551361]
-; X64-SSE-NEXT: movdqa %xmm1, %xmm4
-; X64-SSE-NEXT: pxor %xmm3, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm5
-; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm5
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
-; X64-SSE-NEXT: pcmpeqd %xmm2, %xmm4
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
-; X64-SSE-NEXT: pand %xmm6, %xmm2
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
-; X64-SSE-NEXT: por %xmm2, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm2
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713]
+; X64-SSE-NEXT: movdqa %xmm3, %xmm4
+; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm4
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; X64-SSE-NEXT: pand %xmm5, %xmm2
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; X64-SSE-NEXT: por %xmm2, %xmm3
+; X64-SSE-NEXT: movdqa %xmm3, %xmm2
; X64-SSE-NEXT: pandn %xmm0, %xmm2
-; X64-SSE-NEXT: pand %xmm3, %xmm4
-; X64-SSE-NEXT: por %xmm2, %xmm4
-; X64-SSE-NEXT: movdqa %xmm4, %xmm0
+; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3
+; X64-SSE-NEXT: por %xmm2, %xmm3
+; X64-SSE-NEXT: movdqa %xmm3, %xmm0
; X64-SSE-NEXT: pxor %xmm1, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
-; X64-SSE-NEXT: pxor %xmm2, %xmm1
-; X64-SSE-NEXT: movdqa %xmm0, %xmm3
-; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm3
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
+; X64-SSE-NEXT: por %xmm2, %xmm1
+; X64-SSE-NEXT: movdqa %xmm0, %xmm4
+; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; X64-SSE-NEXT: pand %xmm5, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE-NEXT: pandn %xmm4, %xmm0
+; X64-SSE-NEXT: pandn %xmm3, %xmm0
; X64-SSE-NEXT: pand %xmm2, %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movd %xmm1, %rax