}
return ComputeNumSignBits(Src, Depth + 1);
}
- case ISD::CONCAT_VECTORS:
+ case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
Tmp = std::numeric_limits<unsigned>::max();
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // If we know the element index, demand any elements from the subvector and
+ // the remainder from the src its inserted into, otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+ Tmp = std::numeric_limits<unsigned>::max();
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ if (!!DemandedSubElts) {
+ Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ }
+ APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+ APInt DemandedSrcElts = DemandedElts & ~SubMask;
+ if (!!DemandedSrcElts) {
+ Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
+ // Not able to determine the index so just assume worst case.
+ Tmp = ComputeNumSignBits(Sub, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ Tmp2 = ComputeNumSignBits(Src, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ }
// If we are looking at the loaded value of the SDNode.
if (Op.getResNo() == 0) {
; X32-LABEL: cross_bb_signbits_insert_subvec:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X32-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
-; X32-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X32-NEXT: vpsllw $7, %xmm3, %xmm3
-; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X32-NEXT: vpand %xmm4, %xmm3, %xmm3
-; X32-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
-; X32-NEXT: vpsllw $7, %xmm0, %xmm0
-; X32-NEXT: vpand %xmm4, %xmm0, %xmm0
-; X32-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
-; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; X32-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X32-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; X32-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
+; X32-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X32-NEXT: vandnps %ymm1, %ymm0, %ymm1
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
;
; X64-LABEL: cross_bb_signbits_insert_subvec:
; X64: # %bb.0:
-; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X64-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
-; X64-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X64-NEXT: vpsllw $7, %xmm3, %xmm3
-; X64-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-NEXT: vpand %xmm4, %xmm3, %xmm3
-; X64-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
-; X64-NEXT: vpsllw $7, %xmm0, %xmm0
-; X64-NEXT: vpand %xmm4, %xmm0, %xmm0
-; X64-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
-; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; X64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; X64-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
+; X64-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-NEXT: vandnps %ymm1, %ymm0, %ymm1
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0