[ARM] Stop using scalar FP instructions in integer-only MVE mode.

author Simon Tatham <simon.tatham@arm.com>

Tue, 2 Jul 2019 11:26:00 +0000 (11:26 +0000)

committer Simon Tatham <simon.tatham@arm.com>

Tue, 2 Jul 2019 11:26:00 +0000 (11:26 +0000)
author Simon Tatham <simon.tatham@arm.com>
Tue, 2 Jul 2019 11:26:00 +0000 (11:26 +0000)
committer Simon Tatham <simon.tatham@arm.com>
Tue, 2 Jul 2019 11:26:00 +0000 (11:26 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 632ee004c9f18389b9e8ea4b0d74dcca0ba4a238..36c783c2e650206eab6fb7f7dc40e62895e6df0d 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -224,6 +224,13 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
  void ARMTargetLowering::setAllExpand(MVT VT) {
    for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
      setOperationAction(Opc, VT, Expand);
+
+  // We support these really simple operations even on types where all
+  // the actual arithmetic has to be broken down into simpler
+  // operations or turned into library calls.
+  setOperationAction(ISD::BITCAST, VT, Legal);
+  setOperationAction(ISD::LOAD, VT, Legal);
+  setOperationAction(ISD::STORE, VT, Legal);
  }
  
  void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
@@ -262,9 +269,6 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
-    setOperationAction(ISD::BITCAST, VT, Legal);
-    setOperationAction(ISD::LOAD, VT, Legal);
-    setOperationAction(ISD::STORE, VT, Legal);
  
      if (HasMVEFP) {
        // No native support for these.
@@ -289,9 +293,6 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
    for (auto VT : LongTypes) {
      addRegisterClass(VT, &ARM::QPRRegClass);
      setAllExpand(VT);
-    setOperationAction(ISD::BITCAST, VT, Legal);
-    setOperationAction(ISD::LOAD, VT, Legal);
-    setOperationAction(ISD::STORE, VT, Legal);
    }
  
    // It is legal to extload from v4i8 to v4i16 or v4i32.
@@ -594,10 +595,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
    else
      addRegisterClass(MVT::i32, &ARM::GPRRegClass);
  
-  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
-      !Subtarget->isThumb1Only()) {
+  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
+      Subtarget->hasFPRegs()) {
      addRegisterClass(MVT::f32, &ARM::SPRRegClass);
      addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+    if (!Subtarget->hasVFP2Base())
+      setAllExpand(MVT::f32);
+    if (!Subtarget->hasFP64())
+      setAllExpand(MVT::f64);
    }
  
    if (Subtarget->hasFullFP16()) {
@@ -4544,6 +4549,16 @@ static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
    return false;
  }
  
+bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
+  if (VT == MVT::f32)
+    return !Subtarget->hasVFP2Base();
+  if (VT == MVT::f64)
+    return !Subtarget->hasFP64();
+  if (VT == MVT::f16)
+    return !Subtarget->hasFullFP16();
+  return false;
+}
+
  SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
    SDLoc dl(Op);
@@ -4587,9 +4602,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
    SDValue TrueVal = Op.getOperand(2);
    SDValue FalseVal = Op.getOperand(3);
  
-  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
  
      // If softenSetCCOperands only returned one value, we should compare it to
      // zero.
@@ -4828,9 +4843,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
    SDValue Dest = Op.getOperand(4);
    SDLoc dl(Op);
  
-  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
  
      // If softenSetCCOperands only returned one value, we should compare it to
      // zero.
@@ -4975,7 +4990,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
    if (VT.isVector())
      return LowerVectorFP_TO_INT(Op, DAG);
-  if (!Subtarget->hasFP64() && Op.getOperand(0).getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) {
      RTLIB::Libcall LC;
      if (Op.getOpcode() == ISD::FP_TO_SINT)
        LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
@@ -5039,7 +5054,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
    if (VT.isVector())
      return LowerVectorINT_TO_FP(Op, DAG);
-  if (!Subtarget->hasFP64() && Op.getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(VT)) {
      RTLIB::Libcall LC;
      if (Op.getOpcode() == ISD::SINT_TO_FP)
        LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h

index e79144d91b7554f9138bf00a67dbf49701697230..ca8b042c56be8c34b680bf15a0efcd39bf079ad9 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -794,6 +794,8 @@ class VectorType;
  
      bool shouldConsiderGEPOffsetSplit() const override { return true; }
  
+    bool isUnsupportedFloatingType(EVT VT) const;
+
      SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
                      SDValue ARMcc, SDValue CCR, SDValue Cmp,
                      SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td

index f640628394417c99c537c1c7139506f45ae8d5b8..ea31e631d3a09aae6fff561973d03e9bf1580a88 100644 (file)
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -2269,13 +2269,13 @@ def VMOVDcc  : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
                      IIC_fpUNA64,
                      [(set (f64 DPR:$Dd),
                            (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
-               RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+               RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
  
  def VMOVScc  : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
                      IIC_fpUNA32,
                      [(set (f32 SPR:$Sd),
                            (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
-               RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
+               RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
  } // hasSideEffects
  
  //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/ARM/fp16-instructions.ll b/test/CodeGen/ARM/fp16-instructions.ll

index e27631ce164293b618c70fff613365072211f9ea..a8fc532070e0dbcff5deb7dbd18e6df82e7104d6 100644 (file)
--- a/test/CodeGen/ARM/fp16-instructions.ll
+++ b/test/CodeGen/ARM/fp16-instructions.ll
@@ -1,6 +1,8 @@
  ; SOFT:
  ; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft     | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
  ; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft   | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -float-abi=soft -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
  
  ; SOFTFP:
  ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
@@ -206,8 +208,8 @@ for.end:
  
  ; CHECK-LABEL:            VCMPBRCC:
  
-; CHECK-SOFT:             bl  __aeabi_fcmpgt
-; CHECK-SOFT:             cmp r0, #0
+; CHECK-SOFT:             bl  __aeabi_fcmp{{gt|le}}
+; CHECK-SOFT:             cmp r0, #{{0|1}}
  
  ; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
  ; CHECK-SOFTFP-FP16:      vcmpe.f32 [[S2]], s0
diff --git a/test/CodeGen/Thumb2/float-ops.ll b/test/CodeGen/Thumb2/float-ops.ll

index 3c29b4d69aef99846f365203f177243ad4ef1411..cbd8ca557a06205fc325fc6980618be61539d2ea 100644 (file)
--- a/test/CodeGen/Thumb2/float-ops.ll
+++ b/test/CodeGen/Thumb2/float-ops.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
+; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE -check-prefix=NOREGS
  ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VFP4-ALL
  ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=FP-ARMv8
  ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP4-ALL -check-prefix=VFP4-DP
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabihf -mattr=+mve | FileCheck %s -check-prefix=CHECK -check-prefix=NONE -check-prefix=ONLYREGS
  
  define float @add_f(float %a, float %b) {
  entry:
  ; CHECK-LABEL: add_f:
-; NONE: bl __aeabi_fadd
+; NONE: {{b|bl}} __aeabi_fadd
  ; HARD: vadd.f32  s0, s0, s1
    %0 = fadd float %a, %b
    ret float %0
@@ -15,8 +16,8 @@ entry:
  define double @add_d(double %a, double %b) {
  entry:
  ; CHECK-LABEL: add_d:
-; NONE: bl __aeabi_dadd
-; SP: bl __aeabi_dadd
+; NONE: {{b|bl}} __aeabi_dadd
+; SP: {{b|bl}} __aeabi_dadd
  ; DP: vadd.f64  d0, d0, d1
    %0 = fadd double %a, %b
    ret double %0
@@ -25,7 +26,7 @@ entry:
  define float @sub_f(float %a, float %b) {
  entry:
  ; CHECK-LABEL: sub_f:
-; NONE: bl __aeabi_fsub
+; NONE: {{b|bl}} __aeabi_fsub
  ; HARD: vsub.f32  s
    %0 = fsub float %a, %b
    ret float %0
@@ -34,8 +35,8 @@ entry:
  define double @sub_d(double %a, double %b) {
  entry:
  ; CHECK-LABEL: sub_d:
-; NONE: bl __aeabi_dsub
-; SP: bl __aeabi_dsub
+; NONE: {{b|bl}} __aeabi_dsub
+; SP: {{b|bl}} __aeabi_dsub
  ; DP: vsub.f64  d0, d0, d1
    %0 = fsub double %a, %b
    ret double %0
@@ -44,7 +45,7 @@ entry:
  define float @mul_f(float %a, float %b) {
  entry:
  ; CHECK-LABEL: mul_f:
-; NONE: bl __aeabi_fmul
+; NONE: {{b|bl}} __aeabi_fmul
  ; HARD: vmul.f32  s
    %0 = fmul float %a, %b
    ret float %0
@@ -53,8 +54,8 @@ entry:
  define double @mul_d(double %a, double %b) {
  entry:
  ; CHECK-LABEL: mul_d:
-; NONE: bl __aeabi_dmul
-; SP: bl __aeabi_dmul
+; NONE: {{b|bl}} __aeabi_dmul
+; SP: {{b|bl}} __aeabi_dmul
  ; DP: vmul.f64  d0, d0, d1
    %0 = fmul double %a, %b
    ret double %0
@@ -63,7 +64,7 @@ entry:
  define float @div_f(float %a, float %b) {
  entry:
  ; CHECK-LABEL: div_f:
-; NONE: bl __aeabi_fdiv
+; NONE: {{b|bl}} __aeabi_fdiv
  ; HARD: vdiv.f32  s
    %0 = fdiv float %a, %b
    ret float %0
@@ -72,8 +73,8 @@ entry:
  define double @div_d(double %a, double %b) {
  entry:
  ; CHECK-LABEL: div_d:
-; NONE: bl __aeabi_ddiv
-; SP: bl __aeabi_ddiv
+; NONE: {{b|bl}} __aeabi_ddiv
+; SP: {{b|bl}} __aeabi_ddiv
  ; DP: vdiv.f64  d0, d0, d1
    %0 = fdiv double %a, %b
    ret double %0
@@ -109,7 +110,8 @@ entry:
  define double @load_d(double* %a) {
  entry:
  ; CHECK-LABEL: load_d:
-; NONE: ldm r0, {r0, r1}
+; NOREGS: ldm r0, {r0, r1}
+; ONLYREGS: vldr d0, [r0]
  ; HARD: vldr d0, [r0]
    %0 = load double, double* %a, align 8
    ret double %0
@@ -127,7 +129,8 @@ entry:
  define void @store_d(double* %a, double %b) {
  entry:
  ; CHECK-LABEL: store_d:
-; NONE: strd r2, r3, [r0]
+; NOREGS: strd r2, r3, [r0]
+; ONLYREGS: vstr d0, [r0]
  ; HARD: vstr d0, [r0]
    store double %b, double* %a, align 8
    ret void
@@ -259,8 +262,10 @@ define i64 @bitcast_d_to_i(double %a) {
  
  define float @select_f(float %a, float %b, i1 %c) {
  ; CHECK-LABEL: select_f:
-; NONE: lsls    r2, r2, #31
-; NONE: moveq   r0, r1
+; NOREGS: lsls    r2, r2, #31
+; NOREGS: moveq   r0, r1
+; ONLYREGS: lsls    r2, r2, #31
+; ONLYREGS: vmovne.f32      s2, s0
  ; HARD: lsls    r0, r0, #31
  ; VFP4-ALL: vmovne.f32      s1, s0
  ; VFP4-ALL: vmov.f32        s0, s1
@@ -273,8 +278,8 @@ define double @select_d(double %a, double %b, i1 %c) {
  ; CHECK-LABEL: select_d:
  ; NONE: ldr{{(.w)?}}     [[REG:r[0-9]+]], [sp]
  ; NONE  ands    [[REG]], [[REG]], #1
-; NONE: moveq   r0, r2
-; NONE: moveq   r1, r3
+; NONE-DAG: moveq   r0, r2
+; NONE-DAG: moveq   r1, r3
  ; SP: ands r0, r0, #1
  ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
  ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1
author	Simon Tatham <simon.tatham@arm.com>
	Tue, 2 Jul 2019 11:26:00 +0000 (11:26 +0000)
committer	Simon Tatham <simon.tatham@arm.com>
	Tue, 2 Jul 2019 11:26:00 +0000 (11:26 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
test/CodeGen/ARM/fp16-instructions.ll		patch \| blob \| history
test/CodeGen/Thumb2/float-ops.ll		patch \| blob \| history