{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
{ X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
{ X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
{ X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
{ X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
{ X86::VCVTPD2DQrr, X86::VCVTPD2DQXrm, 0 },
+ { X86::VCVTPD2PSrr, X86::VCVTPD2PSXrm, 0 },
{ X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
{ X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
{ X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
// AVX 256-bit foldable instructions
{ X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
{ X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
+ { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
{ X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
{ X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
{ X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
ret void\r
}\r
\r
+define void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) {\r
+ ;CHECK-LABEL: stack_fold_cvtpd2ps\r
+ ;CHECK: vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload\r
+\r
+ %1 = load <128 x double>* %a\r
+ %2 = load <128 x double>* %b\r
+ %3 = fadd <128 x double> %1, %2\r
+ %4 = fsub <128 x double> %1, %2\r
+ %5 = fptrunc <128 x double> %3 to <128 x float>\r
+ %6 = fptrunc <128 x double> %4 to <128 x float>\r
+ %7 = fadd <128 x float> %5, %6\r
+ store <128 x float> %7, <128 x float>* %c\r
+ ret void\r
+}\r
+\r
define void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 {\r
;CHECK-LABEL: stack_fold_cvttpd2dq\r
;CHECK: vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload\r