From 3d6f38ae74c99b6e15bf956823a2af32e6f112b5 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 2 Sep 2019 16:49:29 +0000 Subject: [PATCH] [SystemZ] Support constrained fpto[su]i intrinsics Now that constrained fpto[su]i intrinsic are available, add codegen support to the SystemZ backend. In addition to pure back-end changes, I've also needed to add the strict_fp_to_[su]int and any_fp_to_[su]int pattern fragments in the obvious way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370674 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetSelectionDAG.td | 10 +++ lib/Target/SystemZ/SystemZISelLowering.cpp | 16 ++++ lib/Target/SystemZ/SystemZInstrFP.td | 24 +++--- lib/Target/SystemZ/SystemZInstrVector.td | 8 +- test/CodeGen/SystemZ/fp-strict-conv-09.ll | 40 +++++++++ test/CodeGen/SystemZ/fp-strict-conv-10.ll | 95 ++++++++++++++++++++++ test/CodeGen/SystemZ/fp-strict-conv-11.ll | 40 +++++++++ test/CodeGen/SystemZ/fp-strict-conv-12.ll | 94 +++++++++++++++++++++ test/CodeGen/SystemZ/fp-strict-conv-14.ll | 77 ++++++++++++++++++ test/CodeGen/SystemZ/fp-strict-conv-16.ll | 63 ++++++++++++++ test/CodeGen/SystemZ/vec-strict-conv-01.ll | 67 +++++++++++++++ test/CodeGen/SystemZ/vec-strict-conv-03.ll | 29 +++++++ 12 files changed, 547 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/SystemZ/fp-strict-conv-09.ll create mode 100644 test/CodeGen/SystemZ/fp-strict-conv-10.ll create mode 100644 test/CodeGen/SystemZ/fp-strict-conv-11.ll create mode 100644 test/CodeGen/SystemZ/fp-strict-conv-12.ll create mode 100644 test/CodeGen/SystemZ/fp-strict-conv-14.ll create mode 100644 test/CodeGen/SystemZ/fp-strict-conv-16.ll create mode 100644 test/CodeGen/SystemZ/vec-strict-conv-01.ll create mode 100644 test/CodeGen/SystemZ/vec-strict-conv-03.ll diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index e474287ca0e..18f61127a7a 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -518,6 +518,10 @@ def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND", SDTFPRoundOp, [SDNPHasChain]>; def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND", SDTFPExtendOp, [SDNPHasChain]>; +def strict_fp_to_sint : SDNode<"ISD::STRICT_FP_TO_SINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_fp_to_uint : SDNode<"ISD::STRICT_FP_TO_UINT", + SDTFPToIntOp, [SDNPHasChain]>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; @@ -1353,6 +1357,12 @@ def any_extloadf32 : PatFrags<(ops node:$ptr), def any_extloadf64 : PatFrags<(ops node:$ptr), [(strict_extloadf64 node:$ptr), (extloadf64 node:$ptr)]>; +def any_fp_to_sint : PatFrags<(ops node:$src), + [(strict_fp_to_sint node:$src), + (fp_to_sint node:$src)]>; +def any_fp_to_uint : PatFrags<(ops node:$src), + [(strict_fp_to_uint node:$src), + (fp_to_uint node:$src)]>; multiclass binary_atomic_op_ord { def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 50a03d538d9..7605a7e8034 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -206,6 +206,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // the default expansion. if (!Subtarget.hasFPExtension()) setOperationAction(ISD::FP_TO_UINT, VT, Expand); + + // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all + // default to Expand, so need to be modified to Legal where appropriate. + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); + if (Subtarget.hasFPExtension()) + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); } } @@ -381,6 +387,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); } if (Subtarget.hasVectorEnhancements2()) { @@ -392,6 +403,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); } // Handle floating-point types. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 19c7ec58ed3..0a4caabe365 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -276,13 +276,13 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { } // fp_to_sint always rounds towards zero, which is modifier value 5. -def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; -def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; -def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; +def : Pat<(i32 (any_fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (any_fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (any_fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; -def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; -def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; -def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; +def : Pat<(i64 (any_fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (any_fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (any_fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; // The FP extension feature provides versions of the above that allow // also specifying the inexact-exception suppression flag. @@ -309,13 +309,13 @@ let Predicates = [FeatureFPExtension] in { def CLGXBR : TernaryRRFe<"clgxbr", 0xB3AE, GR64, FP128>; } - def : Pat<(i32 (fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; - def : Pat<(i32 (fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; - def : Pat<(i32 (fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; } diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index 261727f8905..73c5028f5e3 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -1069,7 +1069,7 @@ let Predicates = [FeatureVector] in { def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1078,7 +1078,7 @@ let Predicates = [FeatureVector] in { def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; } // Convert to logical. @@ -1088,7 +1088,7 @@ let Predicates = [FeatureVector] in { def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1097,7 +1097,7 @@ let Predicates = [FeatureVector] in { def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; } // Divide. diff --git a/test/CodeGen/SystemZ/fp-strict-conv-09.ll b/test/CodeGen/SystemZ/fp-strict-conv-09.ll new file mode 100644 index 00000000000..b1a11e66e2a --- /dev/null +++ b/test/CodeGen/SystemZ/fp-strict-conv-09.ll @@ -0,0 +1,40 @@ +; Test strict conversion of floating-point values to signed i32s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) + +; Test f32->i32. +define i32 @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: cfebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test f64->i32. +define i32 @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: cfdbr %r2, 5, %f0 +; CHECK: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test f128->i32. +define i32 @f3(fp128 *%src) { +; CHECK-LABEL: f3: +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: cfxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i32 %conv +} diff --git a/test/CodeGen/SystemZ/fp-strict-conv-10.ll b/test/CodeGen/SystemZ/fp-strict-conv-10.ll new file mode 100644 index 00000000000..d69e64d7c96 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-strict-conv-10.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test strict conversion of floating-point values to unsigned i32s (z10 only). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; z10 doesn't have native support for unsigned fp-to-i32 conversions; +; they were added in z196 as the Convert to Logical family of instructions. +; Promoting to i64 doesn't generate an inexact condition for values that are +; outside the i32 range but in the i64 range, so use the default expansion. +; Note that the strict expansion sequence must be used. + +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) + +; Test f32->i32. +define i32 @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI0_0 +; CHECK-NEXT: le %f2, 0(%r1) +; CHECK-NEXT: ler %f1, %f0 +; CHECK-NEXT: sebr %f1, %f2 +; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ler %f0, %f1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: cfebr %r2, 5, %f0 +; CHECK-NEXT: xr %r2, %r0 +; CHECK-NEXT: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test f64->i32. +define i32 @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: ldeb %f2, 0(%r1) +; CHECK-NEXT: ldr %f1, %f0 +; CHECK-NEXT: sdbr %f1, %f2 +; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: jl .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ldr %f0, %f1 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: cfdbr %r2, 5, %f0 +; CHECK-NEXT: xr %r2, %r0 +; CHECK-NEXT: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test f128->i32. +define i32 @f3(fp128 *%src) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: ld %f0, 0(%r2) +; CHECK-NEXT: ld %f2, 8(%r2) +; CHECK-NEXT: larl %r1, .LCPI2_0 +; CHECK-NEXT: lxeb %f4, 0(%r1) +; CHECK-NEXT: lxr %f1, %f0 +; CHECK-NEXT: sxbr %f1, %f4 +; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: jl .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lxr %f0, %f1 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: cfxbr %r2, 5, %f0 +; CHECK-NEXT: xr %r2, %r0 +; CHECK-NEXT: br %r14 + %f = load fp128, fp128 *%src + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i32 %conv +} diff --git a/test/CodeGen/SystemZ/fp-strict-conv-11.ll b/test/CodeGen/SystemZ/fp-strict-conv-11.ll new file mode 100644 index 00000000000..bc3e9b9b8a5 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-strict-conv-11.ll @@ -0,0 +1,40 @@ +; Test strict conversion of floating-point values to signed i64s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) + +; Test f32->i64. +define i64 @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: cgebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %f, + metadata !"fpexcept.strict") + ret i64 %conv +} + +; Test f64->i64. +define i64 @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: cgdbr %r2, 5, %f0 +; CHECK: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %f, + metadata !"fpexcept.strict") + ret i64 %conv +} + +; Test f128->i64. +define i64 @f3(fp128 *%src) { +; CHECK-LABEL: f3: +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: cgxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i64 %conv +} diff --git a/test/CodeGen/SystemZ/fp-strict-conv-12.ll b/test/CodeGen/SystemZ/fp-strict-conv-12.ll new file mode 100644 index 00000000000..2319c629daf --- /dev/null +++ b/test/CodeGen/SystemZ/fp-strict-conv-12.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test strict conversion of floating-point values to unsigned i64s (z10 only). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; z10 doesn't have native support for unsigned fp-to-i64 conversions; +; they were added in z196 as the Convert to Logical family of instructions. +; Convert via signed i64s instead. +; Note that the strict expansion sequence must be used. + +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) + +; Test f32->i64. +define i64 @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI0_0 +; CHECK-NEXT: le %f2, 0(%r1) +; CHECK-NEXT: ler %f1, %f0 +; CHECK-NEXT: sebr %f1, %f2 +; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ler %f0, %f1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: cgebr %r2, 5, %f0 +; CHECK-NEXT: xgr %r2, %r0 +; CHECK-NEXT: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %f, + metadata !"fpexcept.strict") + ret i64 %conv +} + +; Test f64->i64. +define i64 @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: ldeb %f2, 0(%r1) +; CHECK-NEXT: ldr %f1, %f0 +; CHECK-NEXT: sdbr %f1, %f2 +; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: jl .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ldr %f0, %f1 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: cgdbr %r2, 5, %f0 +; CHECK-NEXT: xgr %r2, %r0 +; CHECK-NEXT: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %f, + metadata !"fpexcept.strict") + ret i64 %conv +} + +; Test f128->i64. +define i64 @f3(fp128 *%src) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: ld %f0, 0(%r2) +; CHECK-NEXT: ld %f2, 8(%r2) +; CHECK-NEXT: larl %r1, .LCPI2_0 +; CHECK-NEXT: lxeb %f4, 0(%r1) +; CHECK-NEXT: lxr %f1, %f0 +; CHECK-NEXT: sxbr %f1, %f4 +; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: jl .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lxr %f0, %f1 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: cgxbr %r2, 5, %f0 +; CHECK-NEXT: xgr %r2, %r0 +; CHECK-NEXT: br %r14 + %f = load fp128, fp128 *%src + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i64 %conv +} diff --git a/test/CodeGen/SystemZ/fp-strict-conv-14.ll b/test/CodeGen/SystemZ/fp-strict-conv-14.ll new file mode 100644 index 00000000000..70a02c55799 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-strict-conv-14.ll @@ -0,0 +1,77 @@ +; Test strict conversion of floating-point values to unsigned integers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) + +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) + +; Test f32->i32. +define i32 @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: clfebr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test f64->i32. +define i32 @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: clfdbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test f128->i32. +define i32 @f3(fp128 *%src) { +; CHECK-LABEL: f3: +; CHECK-DAG: ld %f0, 0(%r2) +; CHECK-DAG: ld %f2, 8(%r2) +; CHECK: clfxbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test f32->i64. +define i64 @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: clgebr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %f, + metadata !"fpexcept.strict") + ret i64 %conv +} + +; Test f64->i64. +define i64 @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: clgdbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %f, + metadata !"fpexcept.strict") + ret i64 %conv +} + +; Test f128->i64. +define i64 @f6(fp128 *%src) { +; CHECK-LABEL: f6: +; CHECK-DAG: ld %f0, 0(%r2) +; CHECK-DAG: ld %f2, 8(%r2) +; CHECK: clgxbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i64 %conv +} diff --git a/test/CodeGen/SystemZ/fp-strict-conv-16.ll b/test/CodeGen/SystemZ/fp-strict-conv-16.ll new file mode 100644 index 00000000000..fbbb608ac7c --- /dev/null +++ b/test/CodeGen/SystemZ/fp-strict-conv-16.ll @@ -0,0 +1,63 @@ +; Test f128 floating-point strict conversion to/from integers on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; FIXME: llvm.experimental.constrained.[su]itofp does not yet exist + +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) + +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) + +; Test signed f128->i32. +define i32 @f5(fp128 *%src) { +; CHECK-LABEL: f5: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: cfxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test signed f128->i64. +define i64 @f6(fp128 *%src) { +; CHECK-LABEL: f6: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: cgxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i64 %conv +} + +; Test unsigned f128->i32. +define i32 @f7(fp128 *%src) { +; CHECK-LABEL: f7: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: clfxbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i32 %conv +} + +; Test unsigned f128->i64. +define i64 @f8(fp128 *%src) { +; CHECK-LABEL: f8: +; CHECK: vl %v0, 0(%r2) +; CHECK: vrepg %v2, %v0, 1 +; CHECK: clgxbr %r2, 5, %f0, 0 +; CHECK: br %r14 + %f = load fp128, fp128 *%src + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %f, + metadata !"fpexcept.strict") + ret i64 %conv +} diff --git a/test/CodeGen/SystemZ/vec-strict-conv-01.ll b/test/CodeGen/SystemZ/vec-strict-conv-01.ll new file mode 100644 index 00000000000..a5fa0066b14 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-strict-conv-01.ll @@ -0,0 +1,67 @@ +; Test strict conversions between integer and float elements. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; FIXME: llvm.experimental.constrained.[su]itofp does not yet exist + +declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata) + +declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata) +declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata) + +declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata) + +; Test conversion of f64s to signed i64s. +define <2 x i64> @f1(<2 x double> %doubles) { +; CHECK-LABEL: f1: +; CHECK: vcgdb %v24, %v24, 0, 5 +; CHECK: br %r14 + %dwords = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %doubles, + metadata !"fpexcept.strict") + ret <2 x i64> %dwords +} + +; Test conversion of f64s to unsigned i64s. +define <2 x i64> @f2(<2 x double> %doubles) { +; CHECK-LABEL: f2: +; CHECK: vclgdb %v24, %v24, 0, 5 +; CHECK: br %r14 + %dwords = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %doubles, + metadata !"fpexcept.strict") + ret <2 x i64> %dwords +} + +; Test conversion of f64s to signed i32s, which must compile. +define void @f5(<2 x double> %doubles, <2 x i32> *%ptr) { + %words = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %doubles, + metadata !"fpexcept.strict") + store <2 x i32> %words, <2 x i32> *%ptr + ret void +} + +; Test conversion of f64s to unsigned i32s, which must compile. +define void @f6(<2 x double> %doubles, <2 x i32> *%ptr) { + %words = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %doubles, + metadata !"fpexcept.strict") + store <2 x i32> %words, <2 x i32> *%ptr + ret void +} + +; Test conversion of f32s to signed i64s, which must compile. +define <2 x i64> @f9(<2 x float> *%ptr) { + %floats = load <2 x float>, <2 x float> *%ptr + %dwords = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %floats, + metadata !"fpexcept.strict") + ret <2 x i64> %dwords +} + +; Test conversion of f32s to unsigned i64s, which must compile. +define <2 x i64> @f10(<2 x float> *%ptr) { + %floats = load <2 x float>, <2 x float> *%ptr + %dwords = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %floats, + metadata !"fpexcept.strict") + ret <2 x i64> %dwords +} + diff --git a/test/CodeGen/SystemZ/vec-strict-conv-03.ll b/test/CodeGen/SystemZ/vec-strict-conv-03.ll new file mode 100644 index 00000000000..f42a2b41202 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-strict-conv-03.ll @@ -0,0 +1,29 @@ +; Test strict conversions between integer and float elements on arch13. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +; FIXME: llvm.experimental.constrained.[su]itofp does not yet exist + +declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata) +declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata) + +; Test conversion of f32s to signed i32s. +define <4 x i32> @f1(<4 x float> %floats) { +; CHECK-LABEL: f1: +; CHECK: vcfeb %v24, %v24, 0, 5 +; CHECK: br %r14 + %words = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %floats, + metadata !"fpexcept.strict") + ret <4 x i32> %words +} + +; Test conversion of f32s to unsigned i32s. +define <4 x i32> @f2(<4 x float> %floats) { +; CHECK-LABEL: f2: +; CHECK: vclfeb %v24, %v24, 0, 5 +; CHECK: br %r14 + %words = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %floats, + metadata !"fpexcept.strict") + ret <4 x i32> %words +} + -- 2.50.1