From 14debde13b44568fbe3fd1a5be49f21e26b455a6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 2 May 2019 14:47:59 +0000 Subject: [PATCH] [SelectionDAG] remove constant folding limitations based on FP exceptions We don't have FP exception limits in the IR constant folder for the binops (apart from strict ops), so it does not make sense to have them here in the DAG either. Nothing else in the backend tries to preserve exceptions (again outside of strict ops), so I don't see how this could have ever worked for real code that cares about FP exceptions. There are still cases (examples: unary opcodes in SDAG, FMA in IR) where we are trying (at least partially) to preserve exceptions without even asking if the target supports FP exceptions. Those should be corrected in subsequent patches. Real support for FP exceptions requires several changes to handle the constrained/strict FP ops. Differential Revision: https://reviews.llvm.org/D61331 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359791 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/TargetLowering.h | 15 ------- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 42 +++++++------------ lib/CodeGen/TargetLoweringBase.cpp | 1 - lib/Target/AMDGPU/SIISelLowering.cpp | 5 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 3 -- test/CodeGen/AArch64/fp-const-fold.ll | 27 ++++-------- 6 files changed, 24 insertions(+), 69 deletions(-) diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index f2eaf457e4b..ba7989de8ac 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -582,11 +582,6 @@ public: return false; } - /// Return true if target supports floating point exceptions. - bool hasFloatingPointExceptions() const { - return HasFloatingPointExceptions; - } - /// Return true if target always beneficiates from combining into FMA for a /// given value type. This must typically return false on targets where FMA /// takes more cycles to execute than FADD. @@ -1915,12 +1910,6 @@ protected: /// control. void setJumpIsExpensive(bool isExpensive = true); - /// Tells the code generator that this target supports floating point - /// exceptions and cares about preserving floating point exception behavior. - void setHasFloatingPointExceptions(bool FPExceptions = true) { - HasFloatingPointExceptions = FPExceptions; - } - /// Tells the code generator which bitwidths to bypass. void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; @@ -2580,10 +2569,6 @@ private: /// predication. bool JumpIsExpensive; - /// Whether the target supports or cares about preserving floating point - /// exception behavior. - bool HasFloatingPointExceptions; - /// This target prefers to use _setjmp to implement llvm.setjmp. /// /// Defaults to false. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 86e2fc2d675..b893d3ffeef 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4804,38 +4804,30 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2) { + // TODO: We don't do any constant folding for strict FP opcodes here, but we + // should. That will require dealing with a potentially non-default + // rounding mode, checking the "opStatus" return value from the APFloat + // math calculations, and possibly other variations. auto *N1CFP = dyn_cast(N1.getNode()); auto *N2CFP = dyn_cast(N2.getNode()); - bool HasFPExceptions = TLI->hasFloatingPointExceptions(); if (N1CFP && N2CFP) { APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF(); - APFloat::opStatus Status; switch (Opcode) { case ISD::FADD: - Status = C1.add(C2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || Status != APFloat::opInvalidOp) - return getConstantFP(C1, DL, VT); - break; + C1.add(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); case ISD::FSUB: - Status = C1.subtract(C2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || Status != APFloat::opInvalidOp) - return getConstantFP(C1, DL, VT); - break; + C1.subtract(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); case ISD::FMUL: - Status = C1.multiply(C2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || Status != APFloat::opInvalidOp) - return getConstantFP(C1, DL, VT); - break; + C1.multiply(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); case ISD::FDIV: - Status = C1.divide(C2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || Status != APFloat::opInvalidOp) - return getConstantFP(C1, DL, VT); - break; + C1.divide(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); case ISD::FREM: - Status = C1.mod(C2); - if (!HasFPExceptions || Status != APFloat::opInvalidOp) - return getConstantFP(C1, DL, VT); - break; + C1.mod(C2); + return getConstantFP(C1, DL, VT); case ISD::FCOPYSIGN: C1.copySign(C2); return getConstantFP(C1, DL, VT); @@ -5311,10 +5303,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, APFloat V1 = N1CFP->getValueAPF(); const APFloat &V2 = N2CFP->getValueAPF(); const APFloat &V3 = N3CFP->getValueAPF(); - APFloat::opStatus s = - V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); - if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + return getConstantFP(V1, DL, VT); } break; } diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 312579fb14c..13c75fb4ca7 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -545,7 +545,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; EnableExtLdPromotion = false; - HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; BooleanContents = UndefinedBooleanContent; BooleanFloatContents = UndefinedBooleanContent; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index d876acd7eae..7de4dd95e3b 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -729,11 +729,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD); setSchedulingPreference(Sched::RegPressure); - - // SI at least has hardware support for floating point exceptions, but no way - // of using or handling them is implemented. They are also optional in OpenCL - // (Section 7.3) - setHasFloatingPointExceptions(Subtarget->hasFPExceptions()); } const GCNSubtarget *SITargetLowering::getSubtarget() const { diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 3d805344483..41c82ded69c 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -45,9 +45,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setBooleanContents(ZeroOrOneBooleanContent); // Except in SIMD vectors setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - // WebAssembly does not produce floating-point exceptions on normal floating - // point operations. - setHasFloatingPointExceptions(false); // We don't know the microarchitecture here, so just reduce register pressure. setSchedulingPreference(Sched::RegPressure); // Tell ISel that we have a stack pointer. diff --git a/test/CodeGen/AArch64/fp-const-fold.ll b/test/CodeGen/AArch64/fp-const-fold.ll index 5fa46aabbe0..dd10ba09fbd 100644 --- a/test/CodeGen/AArch64/fp-const-fold.ll +++ b/test/CodeGen/AArch64/fp-const-fold.ll @@ -18,10 +18,9 @@ define double @constant_fold_fdiv_by_zero(double* %p) { define double @constant_fold_frem_by_zero(double* %p) { ; CHECK-LABEL: constant_fold_frem_by_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #1 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: mov x8, #9221120237041090560 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: b fmod +; CHECK-NEXT: ret %r = frem double 4.940660e-324, 0.0 ret double %r } @@ -31,10 +30,8 @@ define double @constant_fold_frem_by_zero(double* %p) { define double @constant_fold_fmul_nan(double* %p) { ; CHECK-LABEL: constant_fold_fmul_nan: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9218868437227405312 -; CHECK-NEXT: fmov d0, xzr -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: mov x8, #9221120237041090560 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fmul double 0x7ff0000000000000, 0.0 ret double %r @@ -45,11 +42,8 @@ define double @constant_fold_fmul_nan(double* %p) { define double @constant_fold_fadd_nan(double* %p) { ; CHECK-LABEL: constant_fold_fadd_nan: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4503599627370496 -; CHECK-NEXT: mov x9, #9218868437227405312 +; CHECK-NEXT: mov x8, #9221120237041090560 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ret %r = fadd double 0x7ff0000000000000, 0xfff0000000000000 ret double %r @@ -60,9 +54,8 @@ define double @constant_fold_fadd_nan(double* %p) { define double @constant_fold_fsub_nan(double* %p) { ; CHECK-LABEL: constant_fold_fsub_nan: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9218868437227405312 +; CHECK-NEXT: mov x8, #9221120237041090560 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fsub d0, d0, d0 ; CHECK-NEXT: ret %r = fsub double 0x7ff0000000000000, 0x7ff0000000000000 ret double %r @@ -73,12 +66,8 @@ define double @constant_fold_fsub_nan(double* %p) { define double @constant_fold_fma_nan(double* %p) { ; CHECK-LABEL: constant_fold_fma_nan: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4631107791820423168 -; CHECK-NEXT: mov x9, #9218868437227405312 -; CHECK-NEXT: fmov d0, xzr -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: fmadd d0, d2, d0, d1 +; CHECK-NEXT: mov x8, #9221120237041090560 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = call double @llvm.fma.f64(double 0x7ff0000000000000, double 0.0, double 42.0) ret double %r -- 2.50.1