From 5b129466c2b8447d1e9f09bd705018ea4daeccc1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Dec 2016 13:16:08 +0000 Subject: [PATCH] [X86][SSE] Add support for extracting constant bit data from broadcasted constants git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288499 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 68 ++++++++++++------- .../X86/clear_upper_vector_element_bits.ll | 16 ++--- test/CodeGen/X86/vec_int_to_fp.ll | 12 ++-- test/CodeGen/X86/vec_uint_to_fp-fastmath.ll | 7 +- 4 files changed, 57 insertions(+), 46 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fb3a29c2d89..f64bb096a1c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5138,6 +5138,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, assert(UndefElts.empty() && "Expected an empty UndefElts vector"); assert(EltBits.empty() && "Expected an empty EltBits vector"); + Op = peekThroughBitcasts(Op); + EVT VT = Op.getValueType(); unsigned SizeInBits = VT.getSizeInBits(); assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); @@ -5170,35 +5172,35 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return true; }; - // Extract constant bits from constant pool scalar/vector. + auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask, + APInt &Undefs) { + if (!Cst) + return false; + unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); + if (isa(Cst)) { + Mask = APInt::getNullValue(SizeInBits); + Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits); + return true; + } + if (auto *CInt = dyn_cast(Cst)) { + Mask = CInt->getValue().zextOrTrunc(SizeInBits); + Undefs = APInt::getNullValue(SizeInBits); + return true; + } + if (auto *CFP = dyn_cast(Cst)) { + Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); + Undefs = APInt::getNullValue(SizeInBits); + return true; + } + return false; + }; + + // Extract constant bits from constant pool vector. if (auto *Cst = getTargetConstantFromNode(Op)) { Type *CstTy = Cst->getType(); if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits())) return false; - auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask, - APInt &Undefs) { - if (!Cst) - return false; - unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); - if (isa(Cst)) { - Mask = APInt::getNullValue(SizeInBits); - Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits); - return true; - } - if (auto *CInt = dyn_cast(Cst)) { - Mask = CInt->getValue().zextOrTrunc(SizeInBits); - Undefs = APInt::getNullValue(SizeInBits); - return true; - } - if (auto *CFP = dyn_cast(Cst)) { - Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); - Undefs = APInt::getNullValue(SizeInBits); - return true; - } - return false; - }; - unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) { APInt Bits, Undefs; @@ -5211,9 +5213,27 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return SplitBitData(); } + // Extract constant bits from a broadcasted constant pool scalar. + if (Op.getOpcode() == X86ISD::VBROADCAST && + EltSizeInBits <= Op.getScalarValueSizeInBits()) { + if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) { + APInt Bits, Undefs; + if (ExtractConstantBits(Broadcast, Bits, Undefs)) { + unsigned NumBroadcastBits = Op.getScalarValueSizeInBits(); + unsigned NumBroadcastElts = SizeInBits / NumBroadcastBits; + for (unsigned i = 0; i != NumBroadcastElts; ++i) { + MaskBits |= Bits.shl(i * NumBroadcastBits); + UndefBits |= Undefs.shl(i * NumBroadcastBits); + } + return SplitBitData(); + } + } + } + return false; } +// TODO: Merge more of this with getTargetConstantBitsFromNode. static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, SmallVectorImpl &RawMask) { diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index 150061a6c19..9ae3483062c 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -41,17 +41,11 @@ define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: _clearupper4xi32a: -; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: _clearupper4xi32a: -; AVX2: # BB#0: -; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 -; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: _clearupper4xi32a: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX-NEXT: retq %x0 = extractelement <4 x i32> %0, i32 0 %x1 = extractelement <4 x i32> %0, i32 1 %x2 = extractelement <4 x i32> %0, i32 2 diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 95174542b33..75ec4a4c717 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -590,8 +590,8 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: # kill: %XMM0 %XMM0 %YMM0 @@ -889,8 +889,8 @@ define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) { ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq @@ -3274,8 +3274,8 @@ define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) { ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll index 78b799c2162..a1b46b9324d 100644 --- a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll +++ b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll @@ -26,9 +26,6 @@ ; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]: ; AVX2-NEXT: .long 1199570944 # float 65536 -; AVX2: [[MASKCSTADDR:.LCPI[0-9_]+]]: -; AVX2-NEXT: .long 65535 # 0xffff - define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) { ; SSE2-LABEL: test_uitofp_v4i32_to_v4f32: ; SSE2: # BB#0: @@ -69,8 +66,8 @@ define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) { ; AVX2-NEXT: vcvtdq2ps %xmm1, %xmm1 ; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR]](%rip), %xmm2 ; AVX2-NEXT: vmulps %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastd [[MASKCSTADDR]](%rip), %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] ; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq -- 2.50.1