From 8e4da35c4ae33b1b47ef68f731adfeb1c940a678 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 11 Apr 2019 14:21:57 +0000 Subject: [PATCH] [DAGCombiner][x86] scalarize inserted vector FP ops // bo (build_vec ...undef, x, undef...), (build_vec ...undef, y, undef...) --> // build_vec ...undef, (bo x, y), undef... The lifetime of the nodes in these examples is different for variables versus constants, but they are all build vectors briefly, so I'm proposing to catch them in this form to handle all of the leading examples in the motivating test file. Before we have build vectors, we might have insert_vector_element. After that, we might have scalar_to_vector and constant pool loads. It's going to take more work to ensure that FP vector operands are getting simplified with undef elements, so this transform can apply more widely. In a non-loose FP environment, we are likely simplifying FP elements to NaN values rather than undefs. We also need to allow more opcodes down this path. Eg, we don't handle FP min/max flavors yet. Differential Revision: https://reviews.llvm.org/D60514 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358172 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 58 ++++++++++++ test/CodeGen/X86/scalarize-fp.ll | 112 ++++++++--------------- 2 files changed, 98 insertions(+), 72 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c61b24f8d21..6424f30b310 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18675,6 +18675,61 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } +/// If a vector binop is performed on build vector operands that only have one +/// non-undef element, it may be profitable to extract, scalarize, and insert. +static SDValue scalarizeBinOpOfBuildVectors(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() != ISD::BUILD_VECTOR || N0.getOpcode() != N1.getOpcode()) + return SDValue(); + + // Return the index of exactly one scalar element in an otherwise undefined + // build vector. + auto getScalarIndex = [](SDValue V) { + int NotUndefIndex = -1; + for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (V.getOperand(i).isUndef()) + continue; + // There can be only one. + if (NotUndefIndex >= 0) + return -1; + // This might be the only non-undef operand. + NotUndefIndex = i; + } + return NotUndefIndex; + }; + int N0Index = getScalarIndex(N0); + if (N0Index == -1) + return SDValue(); + int N1Index = getScalarIndex(N1); + if (N1Index == -1) + return SDValue(); + + SDValue X = N0.getOperand(N0Index); + SDValue Y = N1.getOperand(N1Index); + EVT ScalarVT = X.getValueType(); + if (ScalarVT != Y.getValueType()) + return SDValue(); + + // TODO: Remove/replace the extract cost check? If the elements are available + // as scalars, then there may be no extract cost. Should we ask if + // inserting a scalar back into a vector is cheap instead? + EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (N0Index != N1Index || !TLI.isExtractVecEltCheap(VT, N0Index) || + !TLI.isOperationLegalOrCustom(N->getOpcode(), ScalarVT)) + return SDValue(); + + // bo (build_vec ...undef, x, undef...), (build_vec ...undef, y, undef...) --> + // build_vec ...undef, (bo x, y), undef... + SDValue ScalarBO = DAG.getNode(N->getOpcode(), SDLoc(N), ScalarVT, X, Y, + N->getFlags()); + SmallVector Ops(N0.getNumOperands(), DAG.getUNDEF(ScalarVT)); + Ops[N0Index] = ScalarBO; + return DAG.getBuildVector(VT, SDLoc(N), Ops); +} + /// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { assert(N->getValueType(0).isVector() && @@ -18737,6 +18792,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } + if (SDValue V = scalarizeBinOpOfBuildVectors(N, DAG)) + return V; + return SDValue(); } diff --git a/test/CodeGen/X86/scalarize-fp.ll b/test/CodeGen/X86/scalarize-fp.ll index fb665e928e2..40eed1ff38c 100644 --- a/test/CodeGen/X86/scalarize-fp.ll +++ b/test/CodeGen/X86/scalarize-fp.ll @@ -5,14 +5,12 @@ define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fadd_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: addps %xmm1, %xmm0 +; SSE-NEXT: addss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fadd_op1_constant_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fadd <4 x float> %v, @@ -22,16 +20,14 @@ define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind { define <4 x float> @load_fadd_op1_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fadd_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: addps %xmm1, %xmm0 +; SSE-NEXT: addss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fadd_op1_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -43,14 +39,14 @@ define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fsub_op0_constant_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: subps %xmm0, %xmm1 +; SSE-NEXT: subss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fsub_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fsub <4 x float> , %v @@ -60,16 +56,14 @@ define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind { define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fsub_op0_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: subps %xmm1, %xmm0 +; SSE-NEXT: subss (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fsub_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -80,14 +74,12 @@ define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind { define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fmul_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: mulps %xmm1, %xmm0 +; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fmul_op1_constant_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fmul <4 x float> %v, @@ -97,16 +89,14 @@ define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind { define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fmul_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: mulps %xmm1, %xmm0 +; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fmul_op1_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -117,14 +107,12 @@ define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind { define <4 x float> @fdiv_op1_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fdiv_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm1, %xmm0 +; SSE-NEXT: divss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op1_constant_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fdiv <4 x float> %v, @@ -135,15 +123,13 @@ define <4 x float> @load_fdiv_op1_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fdiv_op1_constant_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm1, %xmm0 +; SSE-NEXT: divss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op1_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -155,14 +141,14 @@ define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fdiv_op0_constant_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm0, %xmm1 +; SSE-NEXT: divss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fdiv <4 x float> , %v @@ -172,16 +158,14 @@ define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind { define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fdiv_op0_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm1, %xmm0 +; SSE-NEXT: divss (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -192,14 +176,12 @@ define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind { define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fadd_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: addpd %xmm1, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fadd_op1_constant_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fadd <4 x double> %v, @@ -209,16 +191,14 @@ define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind { define <4 x double> @load_fadd_op1_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fadd_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: addpd %xmm1, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fadd_op1_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -230,14 +210,14 @@ define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fsub_op0_constant_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: subpd %xmm0, %xmm1 +; SSE-NEXT: subsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fsub_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fsub <4 x double> , %v @@ -247,16 +227,14 @@ define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind { define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fsub_op0_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: subpd %xmm1, %xmm0 +; SSE-NEXT: subsd (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fsub_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -267,14 +245,12 @@ define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind { define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fmul_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: mulpd %xmm1, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fmul_op1_constant_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fmul <4 x double> %v, @@ -284,16 +260,14 @@ define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind { define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fmul_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: mulpd %xmm1, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fmul_op1_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -304,14 +278,12 @@ define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind { define <4 x double> @fdiv_op1_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fdiv_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: divsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op1_constant_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fdiv <4 x double> %v, @@ -322,15 +294,13 @@ define <4 x double> @load_fdiv_op1_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fdiv_op1_constant_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: divsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op1_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -342,14 +312,14 @@ define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fdiv_op0_constant_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: divpd %xmm0, %xmm1 +; SSE-NEXT: divsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fdiv <4 x double> , %v @@ -359,16 +329,14 @@ define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind { define <4 x double> @load_fdiv_op0_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fdiv_op0_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: divsd (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 -- 2.50.1