/// If N is a BUILD_VECTOR node whose elements are all the same constant or
/// undefined, return true and return the constant value in \p SplatValue.
- bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
+ /// This sets \p SplatValue to the smallest possible splat unless AllowShrink
+ /// is set to false.
+ bool isConstantSplatVector(const SDNode *N, APInt &SplatValue,
+ bool AllowShrink = true);
/// Return true if the specified node is a BUILD_VECTOR where all of the
/// elements are ~0 or undef.
// ISD Namespace
//===----------------------------------------------------------------------===//
-bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
+ bool AllowShrink) {
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
APInt SplatUndef;
unsigned SplatBitSize;
bool HasUndefs;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) &&
- EltVT.getSizeInBits() >= SplatBitSize;
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
+ MinSplatBits) &&
+ EltSize >= SplatBitSize;
}
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
// In SetLT case, The second operand of the comparison can be either 1 or 0.
APInt SplatVal;
if ((CC == ISD::SETLT) &&
- !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
- SplatVal == 1) ||
+ !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal,
+ /*AllowShrink*/false) &&
+ SplatVal.isOneValue()) ||
(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
return false;
return SDValue();
APInt SplatVal;
- if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
+ if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal,
+ /*AllowShrink*/false) ||
!SplatVal.isMask())
return SDValue();
"Unexpected types for truncate operation");
APInt C;
- if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
+ if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C,
+ /*AllowShrink*/false)) {
// C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
// the element size of the destination type.
return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) :
SDNode *N1 = N->getOperand(1).getNode();
APInt SplatVal;
- if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue())
+ if (!ISD::isConstantSplatVector(N1, SplatVal, /*AllowShrink*/false) ||
+ !SplatVal.isOneValue())
return SDValue();
SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512vl,avx512bw | FileCheck %s
+
+define <16 x i16> @foo(<16 x i32> %i) {
+; CHECK-LABEL: foo:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: retq
+ %x3 = icmp ult <16 x i32> %i, <i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009>
+ %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009>
+ %x6 = trunc <16 x i32> %x5 to <16 x i16>
+ ret <16 x i16> %x6
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .zero 16,1
+
+define <4 x i32> @f(<4 x i32> %a) {
+; CHECK-LABEL: f:
+; CHECK: # BB#0:
+; CHECK-NEXT: paddd .LCPI0_0(%rip), %xmm0
+; CHECK-NEXT: retq
+ %v = add nuw nsw <4 x i32> %a, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
+ ret <4 x i32> %v
+}