setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
- if (IsSigned && N->getValueType(0) == MVT::v2i32) {
+ if (N->getValueType(0) == MVT::v2i32) {
+ assert((IsSigned || Subtarget.hasAVX512()) &&
+ "Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue Src = N->getOperand(0);
if (Src.getValueType() == MVT::v2f64) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
- SDValue Res = DAG.getNode(X86ISD::CVTTPD2DQ, dl, MVT::v4i32, Src);
+ SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTPD2DQ
+ : X86ISD::CVTTPD2UDQ,
+ dl, MVT::v4i32, Src);
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
- Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, Res);
+ Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
+ : ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::CVTTPD2DQ: return "X86ISD::CVTTPD2DQ";
+ case X86ISD::CVTTPD2UDQ: return "X86ISD::CVTTPD2UDQ";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
X86cvttp2uiRnd>, PS,
EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, fp_to_uint,
- X86cvttp2uiRnd>, PS, VEX_W,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
+ X86cvttpd2udq, X86cvttp2uiRnd>, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
+def : Pat<(v4i32 (X86cvttpd2udq (v2f64 VR128X:$src))),
+ (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
+ (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+ VR128X:$src, sub_xmm)))), sub_xmm)>;
+
def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))),
(VCVTTPD2DQZ128rr VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2udq (v2f64 VR128X:$src)))))))),
+ (VCVTTPD2UDQZ128rr VR128:$src)>;
}
let Predicates = [HasAVX512] in {
;
; AVX512F-LABEL: fptoui_2f64_to_4i32:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm0
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f64_to_4i32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512VL-NEXT: vmovq %rax, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512VL-NEXT: vmovq %rax, %xmm0
-; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
; AVX512VLDQ: # BB#0:
-; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
;
; AVX512F-LABEL: fptoui_2f64_to_2i32:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm0
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
+; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f64_to_2i32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512VL-NEXT: vmovq %rax, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
-; AVX512VL-NEXT: vmovq %rax, %xmm0
-; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
; AVX512VLDQ: # BB#0:
-; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX-NEXT: retq
;
-; AVX512-LABEL: fptoui_2f32_to_2i32:
-; AVX512: # BB#0:
-; AVX512-NEXT: vcvttss2usi %xmm0, %rax
-; AVX512-NEXT: vmovq %rax, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX512-NEXT: vcvttss2usi %xmm0, %rax
-; AVX512-NEXT: vmovq %rax, %xmm0
-; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-NEXT: retq
+; AVX512F-LABEL: fptoui_2f32_to_2i32:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
+; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptoui_2f32_to_2i32:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
+; AVX512VLDQ: # BB#0:
+; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VLDQ-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i32>
ret <2 x i32> %cvt
}