[ARM] Explicit lowering of half <-> double conversions.

author Simon Tatham <simon.tatham@arm.com>

Tue, 25 Jun 2019 11:24:50 +0000 (11:24 +0000)

committer Simon Tatham <simon.tatham@arm.com>

Tue, 25 Jun 2019 11:24:50 +0000 (11:24 +0000)
author Simon Tatham <simon.tatham@arm.com>
Tue, 25 Jun 2019 11:24:50 +0000 (11:24 +0000)
committer Simon Tatham <simon.tatham@arm.com>
Tue, 25 Jun 2019 11:24:50 +0000 (11:24 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index df07152d1767b188f931a033d2c9ce62dad4dfe0..f082c843ccd9817fb8bec47890a84a24323360ad 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -768,9 +768,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
      setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
      setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
      setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);
+  }
+
+  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){
      setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);
+    setOperationAction(ISD::FP_ROUND,  MVT::f16, Custom);
    }
  
+  if (!Subtarget->hasFP16())
+    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Custom);
+
+  if (!Subtarget->hasFP64())
+    setOperationAction(ISD::FP_ROUND,  MVT::f32, Custom);
+
    computeRegisterProperties(Subtarget->getRegisterInfo());
  
    // ARM does not have floating-point extending loads.
@@ -14415,27 +14425,74 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
  }
  
  SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getValueType() == MVT::f64 && !Subtarget->hasFP64() &&
+  SDValue SrcVal = Op.getOperand(0);
+  const unsigned DstSz = Op.getValueType().getSizeInBits();
+  const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
+  assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
           "Unexpected type for custom-lowering FP_EXTEND");
  
+  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
+         "With both FP DP and 16, any FP conversion is legal!");
+
+  assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
+         "With FP16, 16 to 32 conversion is legal!");
+
+  // Either we are converting from 16 -> 64, without FP16 and/or
+  // FP.double-precision or without Armv8-fp. So we must do it in two
+  // steps.
+  // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
+  // without FP16. So we must do a function call.
+  SDLoc Loc(Op);
    RTLIB::Libcall LC;
-  LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+  if (SrcSz == 16) {
+    // Instruction from 16 -> 32
+    if (Subtarget->hasFP16())
+      SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, SrcVal);
+    // Lib call from 16 -> 32
+    else {
+      LC = RTLIB::getFPEXT(MVT::f16, MVT::f32);
+      assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+             "Unexpected type for custom-lowering FP_EXTEND");
+      SrcVal =
+        makeLibCall(DAG, LC, MVT::f32, SrcVal, /*isSigned*/ false, Loc).first;
+    }
+  }
  
-  SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
-                     SDLoc(Op)).first;
+  if (DstSz != 64)
+    return SrcVal;
+  // For sure now SrcVal is 32 bits
+  if (Subtarget->hasFP64()) // Instruction from 32 -> 64
+    return DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f64, SrcVal);
+
+  LC = RTLIB::getFPEXT(MVT::f32, MVT::f64);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Unexpected type for custom-lowering FP_EXTEND");
+  return makeLibCall(DAG, LC, MVT::f64, SrcVal, /*isSigned*/ false, Loc).first;
  }
  
  SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getOperand(0).getValueType() == MVT::f64 && !Subtarget->hasFP64() &&
+  SDValue SrcVal = Op.getOperand(0);
+  EVT SrcVT = SrcVal.getValueType();
+  EVT DstVT = Op.getValueType();
+  const unsigned DstSz = Op.getValueType().getSizeInBits();
+  const unsigned SrcSz = SrcVT.getSizeInBits();
+  assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
           "Unexpected type for custom-lowering FP_ROUND");
  
-  RTLIB::Libcall LC;
-  LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
+         "With both FP DP and 16, any FP conversion is legal!");
  
-  SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
-                     SDLoc(Op)).first;
+  SDLoc Loc(Op);
+
+  // Instruction from 32 -> 16 if hasFP16 is valid
+  if (SrcSz == 32 && Subtarget->hasFP16())
+    return Op;
+
+  // Lib call from 32 -> 16 / 64 -> [32, 16]
+  RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Unexpected type for custom-lowering FP_ROUND");
+  return makeLibCall(DAG, LC, DstVT, SrcVal, /*isSigned*/ false, Loc).first;
  }
  
  void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td

index af84157cda532fdd8baf57b39ec8520acce6614d..f640628394417c99c537c1c7139506f45ae8d5b8 100644 (file)
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -714,8 +714,8 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                   Requires<[HasFP16]>,
               Sched<[WriteFPCVT]>;
  
-def : FullFP16Pat<(f32 (fpextend HPR:$Sm)),
-                  (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+def : FP16Pat<(f32 (fpextend HPR:$Sm)),
+              (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
  def : FP16Pat<(f16_to_fp GPR:$a),
                (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
  
@@ -725,8 +725,8 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                   Requires<[HasFP16]>,
               Sched<[WriteFPCVT]>;
  
-def : FullFP16Pat<(f16 (fpround SPR:$Sm)),
-                  (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
+def : FP16Pat<(f16 (fpround SPR:$Sm)),
+              (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
  def : FP16Pat<(fp_to_f16 SPR:$a),
                (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
  
diff --git a/test/CodeGen/ARM/half.ll b/test/CodeGen/ARM/half.ll

index 6759a0576e75c21363db10f7e4e585693ae88539..21ac4f927c6ce3f61aebfa680cc6ec16f8dac986 100644 (file)
--- a/test/CodeGen/ARM/half.ll
+++ b/test/CodeGen/ARM/half.ll
@@ -3,6 +3,10 @@
  ; RUN: llc < %s -mtriple=thumbv8-apple-ios7.0 | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8
  ; RUN: llc < %s -mtriple=armv8r-none-none-eabi | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8
  ; RUN: llc < %s -mtriple=armv8r-none-none-eabi -mattr=-fp64 | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8-SP
+; RUN: llc < %s -mtriple=armv8.1m-none-none-eabi -mattr=+fp-armv8 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V8
+; RUN: llc < %s -mtriple=armv8.1m-none-none-eabi -mattr=+fp-armv8,-fp64 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V8-SP
+; RUN: llc < %s -mtriple=armv8.1m-none-none-eabi -mattr=+mve.fp,+fp64 | FileCheck %s --check-prefix=CHECK-V8
+; RUN: llc < %s -mtriple=armv8.1m-none-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-V8-SP
  
  define void @test_load_store(half* %in, half* %out) {
  ; CHECK-LABEL: test_load_store:
author	Simon Tatham <simon.tatham@arm.com>
	Tue, 25 Jun 2019 11:24:50 +0000 (11:24 +0000)
committer	Simon Tatham <simon.tatham@arm.com>
	Tue, 25 Jun 2019 11:24:50 +0000 (11:24 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
test/CodeGen/ARM/half.ll		patch \| blob \| history