From: Ulrich Weigand Date: Tue, 6 Aug 2019 10:43:13 +0000 (+0000) Subject: [Strict FP] Allow custom operation actions X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4dafc035e7566ec207d5c998c683f3d5c89e3deb;p=llvm [Strict FP] Allow custom operation actions This patch changes the DAG legalizer to respect the operation actions set by the target for strict floating-point operations. (Currently, the legalizer will usually fall back to mutate to the non-strict action (which is assumed to be legal), and only skip mutation if the strict operation is marked legal.) With this patch, if whenever a strict operation is marked as Legal or Custom, it is passed to the target as usual. Only if it is marked as Expand will the legalizer attempt to mutate to the non-strict operation. Note that this will now fail if the non-strict operation is itself marked as Custom -- the target will have to provide a Custom definition for the strict operation then as well. Reviewed By: hfinkel Differential Revision: https://reviews.llvm.org/D65226 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368012 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index d443ce55d42..fdf435b315c 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -930,6 +930,8 @@ public: return Supported ? Action : Expand; } + // If Op is a strict floating-point operation, return the result + // of getOperationAction for the equivalent non-strict operation. LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { unsigned EqOpc; switch (Op) { @@ -962,14 +964,7 @@ public: case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } - auto Action = getOperationAction(EqOpc, VT); - - // We don't currently handle Custom or Promote for strict FP pseudo-ops. - // For now, we just expand for those cases. - if (Action != Legal) - Action = Expand; - - return Action; + return getOperationAction(EqOpc, VT); } /// Return true if the specified operation is legal on this target or can be diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bf817f00f83..e1775d5cb53 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1097,39 +1097,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); - break; case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -2815,6 +2782,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::STRICT_FP_ROUND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2829,6 +2802,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getOperand(1).getValueType(), Node->getValueType(0), dl, Node->getOperand(0)); @@ -3715,6 +3694,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } + if (Results.empty() && Node->isStrictFPOpcode()) { + // FIXME: We were asked to expand a strict floating-point operation, + // but there is currently no expansion implemented that would preserve + // the "strict" properties. For now, we just fall back to the non-strict + // version if that is legal on the target. The actual mutation of the + // operation will happen in SelectionDAGISel::DoInstructionSelection. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + return true; + } + // Replace the original node with the legalized result. if (Results.empty()) { LLVM_DEBUG(dbgs() << "Cannot expand node\n"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 10b8b705869..09b22215173 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -335,12 +335,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FTRUNC: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // If we're asked to expand a strict vector floating-point operation, + // by default we're going to simply unroll it. That is usually the + // best approach, except in the case where the resulting strict (scalar) + // operations would themselves use the fallback mutation to non-strict. + // In that specific case, just do the fallback on the vector op. + if (Action == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) { + EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (TLI.getOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Legal) + Action = TargetLowering::Legal; + } break; case ISD::ADD: case ISD::SUB: diff --git a/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index a35b6f9356c..0df915a19cc 100644 --- a/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -5447,19 +5447,54 @@ entry: define <2 x double> @constrained_vector_nearbyint_v2f64() { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 64 +; PC64LE-NEXT: .cfi_offset lr, 16 ; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpic 34, 0 +; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: addi 1, 1, 64 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 48 +; PC64LE9-NEXT: .cfi_offset lr, 16 ; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 48 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -5568,23 +5603,37 @@ define <3 x double> @constrained_vector_nearby_v3f64() { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 32 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 80 ; PC64LE-NEXT: .cfi_offset lr, 16 +; PC64LE-NEXT: .cfi_offset v31, -16 +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpic 2, 0 -; PC64LE-NEXT: xxswapd 0, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: xxlor 1, 63, 63 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -5593,22 +5642,33 @@ define <3 x double> @constrained_vector_nearby_v3f64() { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 32 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 64 ; PC64LE9-NEXT: .cfi_offset lr, 16 +; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 2, 0 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -5623,28 +5683,92 @@ entry: define <4 x double> @constrained_vector_nearbyint_v4f64() { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 80 +; PC64LE-NEXT: .cfi_offset lr, 16 +; PC64LE-NEXT: .cfi_offset v31, -16 +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l -; PC64LE-NEXT: lxvd2x 1, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvrdpic 34, 0 -; PC64LE-NEXT: xvrdpic 35, 1 +; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: vmr 2, 31 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 64 +; PC64LE9-NEXT: .cfi_offset lr, 16 +; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l -; PC64LE9-NEXT: xvrdpic 34, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 35, 0 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: vmr 2, 31 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( @@ -5706,26 +5830,62 @@ entry: define <2 x double> @constrained_vector_maxnum_v2f64() { ; PC64LE-LABEL: constrained_vector_maxnum_v2f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 64 +; PC64LE-NEXT: .cfi_offset lr, 16 ; PC64LE-NEXT: addis 3, 2, .LCPI86_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI86_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI86_0@toc@l -; PC64LE-NEXT: addi 4, 4, .LCPI86_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: lxvd2x 1, 0, 4 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvmaxdp 34, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI86_0@toc@l(3) +; PC64LE-NEXT: lfs 2, .LCPI86_1@toc@l(4) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: addis 4, 2, .LCPI86_3@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI86_2@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI86_3@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI86_2@toc@l(3) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: addi 1, 1, 64 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_maxnum_v2f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 48 +; PC64LE9-NEXT: .cfi_offset lr, 16 ; PC64LE9-NEXT: addis 3, 2, .LCPI86_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI86_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: lfs 1, .LCPI86_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI86_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI86_1@toc@l -; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvmaxdp 34, 1, 0 +; PC64LE9-NEXT: lfs 2, .LCPI86_1@toc@l(3) +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI86_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI86_2@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI86_3@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI86_3@toc@l(3) +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 48 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( @@ -5854,29 +6014,43 @@ define <3 x double> @constrained_vector_max_v3f64() { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 32 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 80 ; PC64LE-NEXT: .cfi_offset lr, 16 -; PC64LE-NEXT: addis 3, 2, .LCPI88_0@toc@ha +; PC64LE-NEXT: .cfi_offset v31, -16 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: addis 4, 2, .LCPI88_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI88_0@toc@l(3) +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI88_0@toc@ha ; PC64LE-NEXT: lfs 2, .LCPI88_1@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI88_0@toc@l(3) ; PC64LE-NEXT: bl fmax ; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI88_2@toc@ha +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: addis 4, 2, .LCPI88_3@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI88_2@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI88_3@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI88_2@toc@l(3) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 4, 2, .LCPI88_5@toc@ha +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI88_4@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI88_5@toc@l(4) +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI88_4@toc@l(3) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI88_2@toc@l -; PC64LE-NEXT: addi 4, 4, .LCPI88_3@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: lxvd2x 2, 0, 4 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xvmaxdp 2, 2, 0 -; PC64LE-NEXT: xxswapd 0, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: xxlor 1, 63, 63 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -5885,27 +6059,39 @@ define <3 x double> @constrained_vector_max_v3f64() { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 32 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 64 ; PC64LE9-NEXT: .cfi_offset lr, 16 +; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI88_0@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI88_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI88_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI88_1@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmax ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI88_2@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI88_2@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI88_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI88_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI88_3@toc@l +; PC64LE9-NEXT: lfs 2, .LCPI88_3@toc@l(3) +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI88_4@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfs 1, .LCPI88_4@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI88_5@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI88_5@toc@l(3) +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvmaxdp 2, 1, 0 -; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -5921,42 +6107,108 @@ entry: define <4 x double> @constrained_vector_maxnum_v4f64() { ; PC64LE-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI89_0@toc@ha +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 80 +; PC64LE-NEXT: .cfi_offset lr, 16 +; PC64LE-NEXT: .cfi_offset v31, -16 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: addis 4, 2, .LCPI89_1@toc@ha -; PC64LE-NEXT: addis 5, 2, .LCPI89_2@toc@ha -; PC64LE-NEXT: addis 6, 2, .LCPI89_3@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI89_0@toc@l -; PC64LE-NEXT: addi 4, 4, .LCPI89_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: lxvd2x 1, 0, 4 -; PC64LE-NEXT: addi 3, 5, .LCPI89_2@toc@l -; PC64LE-NEXT: addi 4, 6, .LCPI89_3@toc@l -; PC64LE-NEXT: lxvd2x 2, 0, 3 -; PC64LE-NEXT: lxvd2x 3, 0, 4 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xxswapd 3, 3 -; PC64LE-NEXT: xvmaxdp 34, 1, 0 -; PC64LE-NEXT: xvmaxdp 35, 3, 2 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI89_0@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI89_1@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI89_0@toc@l(3) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: addis 4, 2, .LCPI89_3@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI89_2@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI89_3@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI89_2@toc@l(3) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 4, 2, .LCPI89_5@toc@ha +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI89_4@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI89_5@toc@l(4) +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI89_4@toc@l(3) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: addis 4, 2, .LCPI89_7@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI89_6@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI89_7@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI89_6@toc@l(3) +; PC64LE-NEXT: bl fmax +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: vmr 2, 31 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 64 +; PC64LE9-NEXT: .cfi_offset lr, 16 +; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI89_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: lfs 1, .LCPI89_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI89_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI89_1@toc@l -; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: lfs 2, .LCPI89_1@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI89_2@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI89_2@toc@l -; PC64LE9-NEXT: xvmaxdp 34, 1, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI89_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI89_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI89_3@toc@l -; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvmaxdp 35, 1, 0 +; PC64LE9-NEXT: lfs 2, .LCPI89_3@toc@l(3) +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI89_4@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfs 1, .LCPI89_4@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI89_5@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI89_5@toc@l(3) +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI89_6@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI89_6@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI89_7@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI89_7@toc@l(3) +; PC64LE9-NEXT: bl fmax +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: vmr 2, 31 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64( @@ -6020,26 +6272,62 @@ define <1 x float> @constrained_vector_minnum_v1f32() { define <2 x double> @constrained_vector_minnum_v2f64() { ; PC64LE-LABEL: constrained_vector_minnum_v2f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 64 +; PC64LE-NEXT: .cfi_offset lr, 16 ; PC64LE-NEXT: addis 3, 2, .LCPI91_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI91_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI91_0@toc@l -; PC64LE-NEXT: addi 4, 4, .LCPI91_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: lxvd2x 1, 0, 4 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvmindp 34, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI91_0@toc@l(3) +; PC64LE-NEXT: lfs 2, .LCPI91_1@toc@l(4) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: addis 4, 2, .LCPI91_3@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI91_2@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI91_3@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI91_2@toc@l(3) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: addi 1, 1, 64 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_minnum_v2f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 48 +; PC64LE9-NEXT: .cfi_offset lr, 16 ; PC64LE9-NEXT: addis 3, 2, .LCPI91_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI91_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: lfs 1, .LCPI91_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI91_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI91_1@toc@l -; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvmindp 34, 1, 0 +; PC64LE9-NEXT: lfs 2, .LCPI91_1@toc@l(3) +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI91_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI91_2@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI91_3@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI91_3@toc@l(3) +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 48 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( @@ -6168,29 +6456,43 @@ define <3 x double> @constrained_vector_min_v3f64() { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 32 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 80 ; PC64LE-NEXT: .cfi_offset lr, 16 -; PC64LE-NEXT: addis 3, 2, .LCPI93_0@toc@ha +; PC64LE-NEXT: .cfi_offset v31, -16 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: addis 4, 2, .LCPI93_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI93_0@toc@l(3) +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI93_0@toc@ha ; PC64LE-NEXT: lfs 2, .LCPI93_1@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI93_0@toc@l(3) ; PC64LE-NEXT: bl fmin ; PC64LE-NEXT: nop -; PC64LE-NEXT: addis 3, 2, .LCPI93_2@toc@ha +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; PC64LE-NEXT: addis 4, 2, .LCPI93_3@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI93_2@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI93_3@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI93_2@toc@l(3) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 4, 2, .LCPI93_5@toc@ha +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI93_4@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI93_5@toc@l(4) +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI93_4@toc@l(3) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI93_2@toc@l -; PC64LE-NEXT: addi 4, 4, .LCPI93_3@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: lxvd2x 2, 0, 4 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xvmindp 2, 2, 0 -; PC64LE-NEXT: xxswapd 0, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: xxlor 1, 63, 63 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -6199,27 +6501,39 @@ define <3 x double> @constrained_vector_min_v3f64() { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 32 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 64 ; PC64LE9-NEXT: .cfi_offset lr, 16 +; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI93_0@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI93_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI93_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI93_1@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI93_2@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI93_2@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI93_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI93_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI93_3@toc@l +; PC64LE9-NEXT: lfs 2, .LCPI93_3@toc@l(3) +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI93_4@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfs 1, .LCPI93_4@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI93_5@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI93_5@toc@l(3) +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvmindp 2, 1, 0 -; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -6235,42 +6549,108 @@ entry: define <4 x double> @constrained_vector_minnum_v4f64() { ; PC64LE-LABEL: constrained_vector_minnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI94_0@toc@ha +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 80 +; PC64LE-NEXT: .cfi_offset lr, 16 +; PC64LE-NEXT: .cfi_offset v31, -16 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: addis 4, 2, .LCPI94_1@toc@ha -; PC64LE-NEXT: addis 5, 2, .LCPI94_2@toc@ha -; PC64LE-NEXT: addis 6, 2, .LCPI94_3@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI94_0@toc@l -; PC64LE-NEXT: addi 4, 4, .LCPI94_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: lxvd2x 1, 0, 4 -; PC64LE-NEXT: addi 3, 5, .LCPI94_2@toc@l -; PC64LE-NEXT: addi 4, 6, .LCPI94_3@toc@l -; PC64LE-NEXT: lxvd2x 2, 0, 3 -; PC64LE-NEXT: lxvd2x 3, 0, 4 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xxswapd 3, 3 -; PC64LE-NEXT: xvmindp 34, 1, 0 -; PC64LE-NEXT: xvmindp 35, 3, 2 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI94_0@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI94_1@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI94_0@toc@l(3) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: addis 4, 2, .LCPI94_3@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI94_2@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI94_3@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI94_2@toc@l(3) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 4, 2, .LCPI94_5@toc@ha +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI94_4@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI94_5@toc@l(4) +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfs 1, .LCPI94_4@toc@l(3) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: addis 4, 2, .LCPI94_7@toc@ha +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI94_6@toc@ha +; PC64LE-NEXT: lfs 2, .LCPI94_7@toc@l(4) +; PC64LE-NEXT: lfs 1, .LCPI94_6@toc@l(3) +; PC64LE-NEXT: bl fmin +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: vmr 2, 31 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_minnum_v4f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 64 +; PC64LE9-NEXT: .cfi_offset lr, 16 +; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI94_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: lfs 1, .LCPI94_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI94_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI94_1@toc@l -; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: lfs 2, .LCPI94_1@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI94_2@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI94_2@toc@l -; PC64LE9-NEXT: xvmindp 34, 1, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI94_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI94_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI94_3@toc@l -; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvmindp 35, 1, 0 +; PC64LE9-NEXT: lfs 2, .LCPI94_3@toc@l(3) +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI94_4@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfs 1, .LCPI94_4@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI94_5@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI94_5@toc@l(3) +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI94_6@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI94_6@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI94_7@toc@ha +; PC64LE9-NEXT: lfs 2, .LCPI94_7@toc@l(3) +; PC64LE9-NEXT: bl fmin +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: vmr 2, 31 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64( diff --git a/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index ec564d13558..c0d7252ddb7 100644 --- a/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -544,14 +544,19 @@ entry: define <2 x double> @constrained_vector_fadd_v2f64() { ; CHECK-LABEL: constrained_vector_fadd_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] -; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: addsd %xmm0, %xmm1 +; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq entry: %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( @@ -598,22 +603,24 @@ entry: define <3 x double> @constrained_vector_fadd_v3f64() { ; CHECK-LABEL: constrained_vector_fadd_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] -; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: xorpd %xmm1, %xmm1 +; CHECK-NEXT: xorpd %xmm2, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: addsd %xmm1, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: addsd %xmm1, %xmm0 ; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v3f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -629,16 +636,28 @@ entry: define <4 x double> @constrained_vector_fadd_v4f64() { ; CHECK-LABEL: constrained_vector_fadd_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0000000000000001E-1] -; CHECK-NEXT: addpd %xmm1, %xmm0 -; CHECK-NEXT: addpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: addsd %xmm1, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: addsd %xmm1, %xmm2 +; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq entry: %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( @@ -675,14 +694,19 @@ entry: define <2 x double> @constrained_vector_fsub_v2f64() { ; CHECK-LABEL: constrained_vector_fsub_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( @@ -732,12 +756,12 @@ define <3 x double> @constrained_vector_fsub_v3f64() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; CHECK-NEXT: movapd %xmm1, %xmm2 +; CHECK-NEXT: subsd %xmm0, %xmm2 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; @@ -746,8 +770,9 @@ define <3 x double> @constrained_vector_fsub_v3f64() { ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -763,16 +788,28 @@ entry: define <4 x double> @constrained_vector_fsub_v4f64() { ; CHECK-LABEL: constrained_vector_fsub_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: movapd %xmm1, %xmm2 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 ; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: movapd %xmm1, %xmm2 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(