SDValue Src = N->getOperand(0);
SDValue Idx = N->getOperand(1);
+ EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
EVT SrcSVT = SrcVT.getVectorElementType();
- EVT VT = N->getValueType(0);
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
// Don't attempt this for boolean mask vectors or unknown extraction indices.
if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask))
return SDValue();
- // At the moment we can only narrow a shuffle mask to handle extractions
- // of smaller scalars.
- // TODO - investigate support for wider shuffle masks with known upper
- // undef/zero elements for implicit zero-extension.
- unsigned NumMaskElts = Mask.size();
- if ((SrcVT.getVectorNumElements() % NumMaskElts) != 0)
- return SDValue();
-
- int Scale = SrcVT.getVectorNumElements() / NumMaskElts;
- if (Scale != 1) {
- SmallVector<int, 16> ScaledMask;
- scaleShuffleMask(Scale, Mask, ScaledMask);
- Mask = ScaledMask;
+ // Attempt to narrow/widen the shuffle mask to the correct size.
+ if (Mask.size() != NumSrcElts) {
+ if ((NumSrcElts % Mask.size()) == 0) {
+ SmallVector<int, 16> ScaledMask;
+ int Scale = NumSrcElts / Mask.size();
+ scaleShuffleMask(Scale, Mask, ScaledMask);
+ Mask = std::move(ScaledMask);
+ } else if ((Mask.size() % NumSrcElts) == 0) {
+ SmallVector<int, 16> WidenedMask;
+ while (Mask.size() > NumSrcElts &&
+ canWidenShuffleElements(Mask, WidenedMask))
+ Mask = std::move(WidenedMask);
+ // TODO - investigate support for wider shuffle masks with known upper
+ // undef/zero elements for implicit zero-extension.
+ }
}
+ // Check if narrowing/widening failed.
+ if (Mask.size() != NumSrcElts)
+ return SDValue();
+
int SrcIdx = Mask[N->getConstantOperandVal(1)];
SDLoc dl(N);
; X32-LABEL: signbits_ashr_extract_sitofp:
; X32: # BB#0:
; X32-NEXT: pushl %eax
-; X32-NEXT: vpsrad $31, %xmm0, %xmm1
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; X32-NEXT: vmovd %xmm0, %eax
-; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
+; X32-NEXT: vpextrd $1, %xmm0, %eax
+; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax