From d4d582af7ca2ad48e08e0fa1c305304e065a6fa6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 10 Apr 2019 16:24:47 +0000 Subject: [PATCH] [X86][AVX] getTargetConstantBitsFromNode - extract bits from X86ISD::SUBV_BROADCAST git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358096 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 +++ test/CodeGen/X86/combine-bitselect.ll | 118 ++++++++++---------------- 2 files changed, 60 insertions(+), 71 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 671993199f3..2f5db87090e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5920,6 +5920,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, } } + // Extract constant bits from a subvector broadcast. + if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) { + SmallVector SubEltBits; + if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, + UndefElts, SubEltBits, AllowWholeUndefs, + AllowPartialUndefs)) { + UndefElts = APInt::getSplat(NumElts, UndefElts); + while (EltBits.size() < NumElts) + EltBits.append(SubEltBits.begin(), SubEltBits.end()); + return true; + } + } + // Extract a rematerialized scalar constant insertion. if (Op.getOpcode() == X86ISD::VZEXT_MOVL && Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && diff --git a/test/CodeGen/X86/combine-bitselect.ll b/test/CodeGen/X86/combine-bitselect.ll index 9a911ddffef..8cb6a4dca09 100644 --- a/test/CodeGen/X86/combine-bitselect.ll +++ b/test/CodeGen/X86/combine-bitselect.ll @@ -370,44 +370,36 @@ define <8 x i64> @bitselect_v8i64_rm(<8 x i64>, <8 x i64>* nocapture readonly) { ; ; XOP-LABEL: bitselect_v8i64_rm: ; XOP: # %bb.0: -; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [8589934593,3,8589934593,3] -; XOP-NEXT: # ymm2 = mem[0,1,0,1] -; XOP-NEXT: vandps %ymm2, %ymm1, %ymm1 -; XOP-NEXT: vandps %ymm2, %ymm0, %ymm0 -; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] -; XOP-NEXT: # ymm2 = mem[0,1,0,1] -; XOP-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; XOP-NEXT: vorps %ymm1, %ymm3, %ymm1 -; XOP-NEXT: vandps (%rdi), %ymm2, %ymm2 -; XOP-NEXT: vorps %ymm0, %ymm2, %ymm0 +; XOP-NEXT: vmovdqa (%rdi), %ymm2 +; XOP-NEXT: vmovdqa 32(%rdi), %ymm3 +; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] +; XOP-NEXT: # ymm4 = mem[0,1,0,1] +; XOP-NEXT: vpcmov %ymm4, %ymm0, %ymm2, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v8i64_rm: ; AVX1: # %bb.0: -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [8589934593,3,8589934593,3] -; AVX1-NEXT: # ymm2 = mem[0,1,0,1] -; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 -; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] ; AVX1-NEXT: # ymm2 = mem[0,1,0,1] ; AVX1-NEXT: vandps 32(%rdi), %ymm2, %ymm3 +; AVX1-NEXT: vandps (%rdi), %ymm2, %ymm4 +; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vorps %ymm0, %ymm4, %ymm0 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: vandps (%rdi), %ymm2, %ymm2 -; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: bitselect_v8i64_rm: ; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [8589934593,3,8589934593,3] -; AVX2-NEXT: # ymm2 = mem[0,1,0,1] -; AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] ; AVX2-NEXT: # ymm2 = mem[0,1,0,1] ; AVX2-NEXT: vandps 32(%rdi), %ymm2, %ymm3 +; AVX2-NEXT: vandps (%rdi), %ymm2, %ymm4 +; AVX2-NEXT: vandnps %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vorps %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vandps (%rdi), %ymm2, %ymm2 -; AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: bitselect_v8i64_rm: @@ -455,16 +447,12 @@ define <8 x i64> @bitselect_v8i64_mr(<8 x i64>* nocapture readonly, <8 x i64>) { ; ; XOP-LABEL: bitselect_v8i64_mr: ; XOP: # %bb.0: -; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] -; XOP-NEXT: # ymm2 = mem[0,1,0,1] -; XOP-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; XOP-NEXT: vandps (%rdi), %ymm2, %ymm2 -; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [18446744060824649725,18446744069414584319,18446744060824649725,18446744069414584319] +; XOP-NEXT: vmovdqa (%rdi), %ymm2 +; XOP-NEXT: vmovdqa 32(%rdi), %ymm3 +; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [12884901890,4294967296,12884901890,4294967296] ; XOP-NEXT: # ymm4 = mem[0,1,0,1] -; XOP-NEXT: vandps %ymm4, %ymm1, %ymm1 -; XOP-NEXT: vorps %ymm1, %ymm3, %ymm1 -; XOP-NEXT: vandps %ymm4, %ymm0, %ymm0 -; XOP-NEXT: vorps %ymm0, %ymm2, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm0, %ymm2, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v8i64_mr: @@ -472,13 +460,11 @@ define <8 x i64> @bitselect_v8i64_mr(<8 x i64>* nocapture readonly, <8 x i64>) { ; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] ; AVX1-NEXT: # ymm2 = mem[0,1,0,1] ; AVX1-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; AVX1-NEXT: vandps (%rdi), %ymm2, %ymm2 -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [18446744060824649725,18446744069414584319,18446744060824649725,18446744069414584319] -; AVX1-NEXT: # ymm4 = mem[0,1,0,1] -; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vandps (%rdi), %ymm2, %ymm4 +; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vorps %ymm0, %ymm4, %ymm0 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 -; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: bitselect_v8i64_mr: @@ -486,13 +472,11 @@ define <8 x i64> @bitselect_v8i64_mr(<8 x i64>* nocapture readonly, <8 x i64>) { ; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] ; AVX2-NEXT: # ymm2 = mem[0,1,0,1] ; AVX2-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; AVX2-NEXT: vandps (%rdi), %ymm2, %ymm2 -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [18446744060824649725,18446744069414584319,18446744060824649725,18446744069414584319] -; AVX2-NEXT: # ymm4 = mem[0,1,0,1] -; AVX2-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX2-NEXT: vandps (%rdi), %ymm2, %ymm4 +; AVX2-NEXT: vandnps %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vorps %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vandps %ymm4, %ymm0, %ymm0 -; AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: bitselect_v8i64_mr: @@ -536,44 +520,36 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n ; ; XOP-LABEL: bitselect_v8i64_mm: ; XOP: # %bb.0: -; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [3,8589934593,3,8589934593] -; XOP-NEXT: # ymm0 = mem[0,1,0,1] -; XOP-NEXT: vandps 32(%rdi), %ymm0, %ymm1 -; XOP-NEXT: vandps (%rdi), %ymm0, %ymm0 +; XOP-NEXT: vmovdqa (%rsi), %ymm0 +; XOP-NEXT: vmovdqa 32(%rsi), %ymm1 ; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] ; XOP-NEXT: # ymm2 = mem[0,1,0,1] -; XOP-NEXT: vandps 32(%rsi), %ymm2, %ymm3 -; XOP-NEXT: vorps %ymm1, %ymm3, %ymm1 -; XOP-NEXT: vandps (%rsi), %ymm2, %ymm2 -; XOP-NEXT: vorps %ymm0, %ymm2, %ymm0 +; XOP-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm2, 32(%rdi), %ymm1, %ymm1 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v8i64_mm: ; AVX1: # %bb.0: -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [3,8589934593,3,8589934593] -; AVX1-NEXT: # ymm0 = mem[0,1,0,1] -; AVX1-NEXT: vandps 32(%rdi), %ymm0, %ymm1 -; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] -; AVX1-NEXT: # ymm2 = mem[0,1,0,1] -; AVX1-NEXT: vandps 32(%rsi), %ymm2, %ymm3 -; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: vandps (%rsi), %ymm2, %ymm2 -; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX1-NEXT: # ymm1 = mem[0,1,0,1] +; AVX1-NEXT: vandps 32(%rsi), %ymm1, %ymm2 +; AVX1-NEXT: vandps (%rsi), %ymm1, %ymm0 +; AVX1-NEXT: vandnps (%rdi), %ymm1, %ymm3 +; AVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 +; AVX1-NEXT: vandnps 32(%rdi), %ymm1, %ymm1 +; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: bitselect_v8i64_mm: ; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [3,8589934593,3,8589934593] -; AVX2-NEXT: # ymm0 = mem[0,1,0,1] -; AVX2-NEXT: vandps 32(%rdi), %ymm0, %ymm1 -; AVX2-NEXT: vandps (%rdi), %ymm0, %ymm0 -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] -; AVX2-NEXT: # ymm2 = mem[0,1,0,1] -; AVX2-NEXT: vandps 32(%rsi), %ymm2, %ymm3 -; AVX2-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vandps (%rsi), %ymm2, %ymm2 -; AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX2-NEXT: # ymm1 = mem[0,1,0,1] +; AVX2-NEXT: vandps 32(%rsi), %ymm1, %ymm2 +; AVX2-NEXT: vandps (%rsi), %ymm1, %ymm0 +; AVX2-NEXT: vandnps (%rdi), %ymm1, %ymm3 +; AVX2-NEXT: vorps %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vandnps 32(%rdi), %ymm1, %ymm1 +; AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: bitselect_v8i64_mm: -- 2.40.0