getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TheVT),
Value, ThreshVal, ISD::SETLT);
- Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(0x80000000, DL, MVT::i32));
+ Adjust = DAG.getSelect(DL, MVT::i64, Cmp,
+ DAG.getConstant(0, DL, MVT::i64),
+ DAG.getConstant(APInt::getSignMask(64),
+ DL, MVT::i64));
SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TheVT),
DAG.getVTList(MVT::Other),
Ops, DstTy, MMO);
- if (!UnsignedFixup)
- return DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot,
- MachinePointerInfo());
-
- // Insert the FIST, load its result as two i32's,
- // and XOR the high i32 with Adjust.
-
- SDValue Low32 =
- DAG.getLoad(MVT::i32, DL, FIST, StackSlot, MachinePointerInfo());
- SDValue HighAddr = DAG.getMemBasePlusOffset(StackSlot, 4, DL);
-
- SDValue High32 =
- DAG.getLoad(MVT::i32, DL, FIST, HighAddr, MachinePointerInfo());
- High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
+ SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot,
+ MachinePointerInfo());
- if (Subtarget.is64Bit()) {
- // Join High32 and Low32 into a 64-bit result.
- // (High32 << 32) | Low32
- Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
- High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
- High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
- DAG.getConstant(32, DL, MVT::i8));
- return DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
- }
+ // If we need an unsigned fixup, XOR the result with adjust.
+ if (UnsignedFixup)
+ Res = DAG.getNode(ISD::XOR, DL, MVT::i64, Res, Adjust);
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { Low32, High32 });
+ return Res;
}
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $68, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 72
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
-; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3
-; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
-; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3
-; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
+; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: jb LBB11_2
+; CHECK-NEXT: ## %bb.1:
+; CHECK-NEXT: vsubss %xmm1, %xmm2, %xmm2
+; CHECK-NEXT: LBB11_2:
+; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fisttpll (%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: vucomiss %xmm2, %xmm1
; CHECK-NEXT: setae %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: shll $31, %eax
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: vucomiss %xmm2, %xmm0
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: jb LBB11_4
+; CHECK-NEXT: ## %bb.3:
+; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: LBB11_4:
+; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
; CHECK-NEXT: setae %cl
+; CHECK-NEXT: movzbl %cl, %ecx
; CHECK-NEXT: shll $31, %ecx
; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero