From: Simon Pilgrim Date: Wed, 28 Jun 2017 17:40:20 +0000 (+0000) Subject: [BBVectorize] Regenerate simple tests X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=881a07c421beb9729d3cc0d0395453649703d8e3;p=llvm [BBVectorize] Regenerate simple tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306571 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll index b7f87fe1db0..dd5e90841a7 100644 --- a/test/Transforms/BBVectorize/simple-int.ll +++ b/test/Transforms/BBVectorize/simple-int.ll @@ -1,6 +1,8 @@ -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + declare double @llvm.fma.f64(double, double, double) declare double @llvm.fmuladd.f64(double, double, double) declare double @llvm.cos.f64(double) @@ -20,473 +22,479 @@ declare i64 @llvm.cttz.i64(i64, i1) ; Basic depth-3 chain with fma define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1) - %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test1( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 -; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 -; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0 +; CHECK-NEXT: [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1 +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1) + %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with fmuladd define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1) - %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test1a( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 -; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 -; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0 +; CHECK-NEXT: [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1 +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1) + %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with cos define double @test2(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.cos.f64(double %X1) - %Y2 = call double @llvm.cos.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test2( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.cos.f64(double %X1) + %Y2 = call double @llvm.cos.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with powi define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { - - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) - %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test3( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.powi.v2f64(<2 x double> [[X1]], i32 [[P:%.*]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) + %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with powi (different powers: should not vectorize) define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) { - - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %P2 = add i32 %P, 1 - %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) - %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test4( -; CHECK-NOT: <2 x double> -; CHECK: ret double %R +; CHECK-NEXT: [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]] +; CHECK-NEXT: [[P2:%.*]] = add i32 [[P:%.*]], 1 +; CHECK-NEXT: [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P]]) +; CHECK-NEXT: [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd double [[Y1]], [[B1]] +; CHECK-NEXT: [[Z2:%.*]] = fadd double [[Y2]], [[B2]] +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1]], [[Z2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %P2 = add i32 %P, 1 + %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) + %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with round define double @testround(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.round.f64(double %X1) - %Y2 = call double @llvm.round.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testround -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testround( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.round.f64(double %X1) + %Y2 = call double @llvm.round.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with copysign define double @testcopysign(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.copysign.f64(double %X1, double %A1) - %Y2 = call double @llvm.copysign.f64(double %X2, double %A1) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testcopysign -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1.v.i1.2 = shufflevector <2 x double> %X1.v.i0.1, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testcopysign( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1_V_I1_2:%.*]] = shufflevector <2 x double> [[X1_V_I0_1]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[X1]], <2 x double> [[Y1_V_I1_2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.copysign.f64(double %X1, double %A1) + %Y2 = call double @llvm.copysign.f64(double %X2, double %A1) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with ceil define double @testceil(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.ceil.f64(double %X1) - %Y2 = call double @llvm.ceil.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testceil -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testceil( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.ceil.f64(double %X1) + %Y2 = call double @llvm.ceil.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with nearbyint define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.nearbyint.f64(double %X1) - %Y2 = call double @llvm.nearbyint.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testnearbyint -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testnearbyint( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.nearbyint.f64(double %X1) + %Y2 = call double @llvm.nearbyint.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with rint define double @testrint(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.rint.f64(double %X1) - %Y2 = call double @llvm.rint.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testrint -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testrint( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.rint.f64(double %X1) + %Y2 = call double @llvm.rint.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with trunc define double @testtrunc(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.trunc.f64(double %X1) - %Y2 = call double @llvm.trunc.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testtrunc -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testtrunc( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.trunc.f64(double %X1) + %Y2 = call double @llvm.trunc.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with floor define double @testfloor(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.floor.f64(double %X1) - %Y2 = call double @llvm.floor.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testfloor -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testfloor( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.floor.f64(double %X1) + %Y2 = call double @llvm.floor.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with fabs define double @testfabs(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.fabs.f64(double %X1) - %Y2 = call double @llvm.fabs.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testfabs -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testfabs( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.fabs.f64(double %X1) + %Y2 = call double @llvm.fabs.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with bswap define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.bswap.i64(i64 %X1) - %Y2 = call i64 @llvm.bswap.i64(i64 %X2) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testbswap -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R - +; CHECK-LABEL: @testbswap( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.bswap.i64(i64 %X1) + %Y2 = call i64 @llvm.bswap.i64(i64 %X2) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with ctpop define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.ctpop.i64(i64 %X1) - %Y2 = call i64 @llvm.ctpop.i64(i64 %X2) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testctpop -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R - +; CHECK-LABEL: @testctpop( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.ctpop.i64(i64 %X1) + %Y2 = call i64 @llvm.ctpop.i64(i64 %X2) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with ctlz define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testctlz -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testctlz( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[X1]], i1 true) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with ctlz define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testctlzneg -; CHECK: %X1 = sub i64 %A1, %B1 -; CHECK: %X2 = sub i64 %A2, %B2 -; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) -; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) -; CHECK: %Z1 = add i64 %Y1, %B1 -; CHECK: %Z2 = add i64 %Y2, %B2 -; CHECK: %R = mul i64 %Z1, %Z2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testctlzneg( +; CHECK-NEXT: [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]] +; CHECK-NEXT: [[Y1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X1]], i1 true), !range !0 +; CHECK-NEXT: [[Y2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X2]], i1 false), !range !0 +; CHECK-NEXT: [[Z1:%.*]] = add i64 [[Y1]], [[B1]] +; CHECK-NEXT: [[Z2:%.*]] = add i64 [[Y2]], [[B2]] +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1]], [[Z2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with cttz define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testcttz -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testcttz( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X1]], i1 true) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with cttz define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testcttzneg -; CHECK: %X1 = sub i64 %A1, %B1 -; CHECK: %X2 = sub i64 %A2, %B2 -; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) -; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) -; CHECK: %Z1 = add i64 %Y1, %B1 -; CHECK: %Z2 = add i64 %Y2, %B2 -; CHECK: %R = mul i64 %Z1, %Z2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testcttzneg( +; CHECK-NEXT: [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]] +; CHECK-NEXT: [[Y1:%.*]] = call i64 @llvm.cttz.i64(i64 [[X1]], i1 true), !range !0 +; CHECK-NEXT: [[Y2:%.*]] = call i64 @llvm.cttz.i64(i64 [[X2]], i1 false), !range !0 +; CHECK-NEXT: [[Z1:%.*]] = add i64 [[Y1]], [[B1]] +; CHECK-NEXT: [[Z2:%.*]] = add i64 [[Y2]], [[B2]] +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1]], [[Z2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } - - ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 ; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0 diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll index 0fe33f17a64..12f97ab77ba 100644 --- a/test/Transforms/BBVectorize/simple.ll +++ b/test/Transforms/BBVectorize/simple.ll @@ -1,199 +1,209 @@ -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + ; Basic depth-3 chain define double @test1(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test1( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (last pair permuted) define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test2( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y2, %B1 - %Z2 = fadd double %Y1, %B2 -; CHECK: %Z1.v.i1.1 = insertelement <2 x double> undef, double %B2, i32 0 -; CHECK: %Z1.v.i1.2 = insertelement <2 x double> %Z1.v.i1.1, double %B1, i32 1 -; CHECK: %Z2 = fadd <2 x double> %Y1, %Z1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z2.v.r1 = extractelement <2 x double> %Z2, i32 0 -; CHECK: %Z2.v.r2 = extractelement <2 x double> %Z2, i32 1 -; CHECK: %R = fmul double %Z2.v.r2, %Z2.v.r1 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0 +; CHECK-NEXT: [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1 +; CHECK-NEXT: [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]] +; CHECK-NEXT: [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0 +; CHECK-NEXT: [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y1, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (last pair first splat) define double @test3(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test3( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y2, %B1 - %Z2 = fadd double %Y2, %B2 -; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> -; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (last pair second splat) define double @test4(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test4( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y1, %B2 -; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y1, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) { ; CHECK-LABEL: @test5( -; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> -; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> - %X1 = fsub <2 x float> %A1, %B1 - %X2 = fsub <2 x float> %A2, %B2 -; CHECK: %X1 = fsub <4 x float> %X1.v.i0, %X1.v.i1 - %Y1 = fmul <2 x float> %X1, %A1 - %Y2 = fmul <2 x float> %X2, %A2 -; CHECK: %Y1 = fmul <4 x float> %X1, %X1.v.i0 - %Z1 = fadd <2 x float> %Y1, %B1 - %Z2 = fadd <2 x float> %Y2, %B2 -; CHECK: %Z1 = fadd <4 x float> %Y1, %X1.v.i1 - %R = fmul <2 x float> %Z1, %Z2 -; CHECK: %Z1.v.r1 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> -; CHECK: %Z1.v.r2 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> -; CHECK: %R = fmul <2 x float> %Z1.v.r1, %Z1.v.r2 - ret <2 x float> %R -; CHECK: ret <2 x float> %R +; CHECK-NEXT: [[X1_V_I1:%.*]] = shufflevector <2 x float> [[B1:%.*]], <2 x float> [[B2:%.*]], <4 x i32> +; CHECK-NEXT: [[X1_V_I0:%.*]] = shufflevector <2 x float> [[A1:%.*]], <2 x float> [[A2:%.*]], <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = fsub <4 x float> [[X1_V_I0]], [[X1_V_I1]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <4 x float> [[X1]], [[X1_V_I0]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <4 x float> [[Y1]], [[X1_V_I1]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[Z1_V_R2:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fmul <2 x float> [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret <2 x float> [[R]] +; + %X1 = fsub <2 x float> %A1, %B1 + %X2 = fsub <2 x float> %A2, %B2 + %Y1 = fmul <2 x float> %X1, %A1 + %Y2 = fmul <2 x float> %X2, %A2 + %Z1 = fadd <2 x float> %Y1, %B1 + %Z2 = fadd <2 x float> %Y2, %B2 + %R = fmul <2 x float> %Z1, %Z2 + ret <2 x float> %R } ; Basic chain with shuffles define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { ; CHECK-LABEL: @test6( -; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> -; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> - %X1 = sub <8 x i8> %A1, %B1 - %X2 = sub <8 x i8> %A2, %B2 -; CHECK: %X1 = sub <16 x i8> %X1.v.i0, %X1.v.i1 - %Y1 = mul <8 x i8> %X1, %A1 - %Y2 = mul <8 x i8> %X2, %A2 -; CHECK: %Y1 = mul <16 x i8> %X1, %X1.v.i0 - %Z1 = add <8 x i8> %Y1, %B1 - %Z2 = add <8 x i8> %Y2, %B2 -; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1 - %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> - %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> -; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> -; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> - %R = mul <8 x i8> %Q1, %Q2 -; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> -; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> -; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2 - ret <8 x i8> %R -; CHECK: ret <8 x i8> %R +; CHECK-NEXT: [[X1_V_I1:%.*]] = shufflevector <8 x i8> [[B1:%.*]], <8 x i8> [[B2:%.*]], <16 x i32> +; CHECK-NEXT: [[X1_V_I0:%.*]] = shufflevector <8 x i8> [[A1:%.*]], <8 x i8> [[A2:%.*]], <16 x i32> +; CHECK-NEXT: [[X1:%.*]] = sub <16 x i8> [[X1_V_I0]], [[X1_V_I1]] +; CHECK-NEXT: [[Y1:%.*]] = mul <16 x i8> [[X1]], [[X1_V_I0]] +; CHECK-NEXT: [[Z1:%.*]] = add <16 x i8> [[Y1]], [[X1_V_I1]] +; CHECK-NEXT: [[Q1_V_I1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[Q1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> [[Q1_V_I1]], <16 x i32> +; CHECK-NEXT: [[Q1_V_R1:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[Q1_V_R2:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = mul <8 x i8> [[Q1_V_R1]], [[Q1_V_R2]] +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %X1 = sub <8 x i8> %A1, %B1 + %X2 = sub <8 x i8> %A2, %B2 + %Y1 = mul <8 x i8> %X1, %A1 + %Y2 = mul <8 x i8> %X2, %A2 + %Z1 = add <8 x i8> %Y1, %B1 + %Z2 = add <8 x i8> %Y2, %B2 + %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> + %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> + %R = mul <8 x i8> %Q1, %Q2 + ret <8 x i8> %R } ; Basic depth-3 chain (flipped order) define double @test7(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test7( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z2 = fadd double %Y2, %B2 - %Z1 = fadd double %Y1, %B1 -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z2 = fadd double %Y2, %B2 + %Z1 = fadd double %Y1, %B1 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (subclass data) define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { ; CHECK-LABEL: @test8( -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 - %X1 = sub nsw i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = mul i64 %X1, %A1 - %Y2 = mul i64 %X2, %A2 -; CHECK: %Y1 = mul <2 x i64> %X1, %X1.v.i0.2 - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 - %R = mul i64 %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 - ret i64 %R -; CHECK: ret i64 %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = mul <2 x i64> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub nsw i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = mul i64 %X1, %A1 + %Y2 = mul i64 %X2, %A2 + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll index 6edf7f07ac1..7dd538bdfb0 100644 --- a/test/Transforms/BBVectorize/simple3.ll +++ b/test/Transforms/BBVectorize/simple3.ll @@ -1,35 +1,38 @@ -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + ; Basic depth-3 chain define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) { ; CHECK-LABEL: @test1( -; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1 -; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2 -; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1 -; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %X3 = fsub double %A3, %B3 -; CHECK: %X1 = fsub <3 x double> %X1.v.i0, %X1.v.i1 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 - %Y3 = fmul double %X3, %A3 -; CHECK: %Y1 = fmul <3 x double> %X1, %X1.v.i0 - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %Z3 = fadd double %Y3, %B3 -; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1 - %R1 = fmul double %Z1, %Z2 - %R = fmul double %R1, %Z3 -; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2 -; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1 -; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: %R = fmul double %R1, %Z1.v.r210 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_11:%.*]] = insertelement <3 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_22:%.*]] = insertelement <3 x double> [[X1_V_I1_11]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I1:%.*]] = insertelement <3 x double> [[X1_V_I1_22]], double [[B3:%.*]], i32 2 +; CHECK-NEXT: [[X1_V_I0_13:%.*]] = insertelement <3 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_24:%.*]] = insertelement <3 x double> [[X1_V_I0_13]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0:%.*]] = insertelement <3 x double> [[X1_V_I0_24]], double [[A3:%.*]], i32 2 +; CHECK-NEXT: [[X1:%.*]] = fsub <3 x double> [[X1_V_I0]], [[X1_V_I1]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <3 x double> [[X1]], [[X1_V_I0]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <3 x double> [[Y1]], [[X1_V_I1]] +; CHECK-NEXT: [[Z1_V_R210:%.*]] = extractelement <3 x double> [[Z1]], i32 2 +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <3 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <3 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R1:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: [[R:%.*]] = fmul double [[R1]], [[Z1_V_R210]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %X3 = fsub double %A3, %B3 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Y3 = fmul double %X3, %A3 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %Z3 = fadd double %Y3, %B3 + %R1 = fmul double %Z1, %Z2 + %R = fmul double %R1, %Z3 + ret double %R } diff --git a/test/Transforms/BBVectorize/vector-sel.ll b/test/Transforms/BBVectorize/vector-sel.ll index cb775ceae69..bc15073b5a1 100644 --- a/test/Transforms/BBVectorize/vector-sel.ll +++ b/test/Transforms/BBVectorize/vector-sel.ll @@ -1,16 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -S | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @d = external global [1 x [10 x [1 x i16]]], align 16 -;CHECK-LABEL: @test -;CHECK: %0 = select i1 %bool, <4 x i16> , <4 x i16> -;CHECK: %1 = select i1 %bool, <4 x i16> , <4 x i16> -;CHECK: %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> -;CHECK: %3 = shufflevector <4 x i1> %boolvec, <4 x i1> %boolvec, <8 x i32> -;CHECK: %4 = select <8 x i1> %3, <8 x i16> , <8 x i16> %2 define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BOOL:%.*]] = icmp ne i32 undef, 0 +; CHECK-NEXT: [[BOOLVEC:%.*]] = icmp ne <4 x i32> undef, zeroinitializer +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[BOOL]], <4 x i16> , <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL]], <4 x i16> , <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[BOOLVEC]], <4 x i1> [[BOOLVEC]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> , <8 x i16> [[TMP2]] +; CHECK-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr ([1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0) to <8 x i16>*), align 2 +; CHECK-NEXT: ret void +; entry: %bool = icmp ne i32 undef, 0 %boolvec = icmp ne <4 x i32> undef, zeroinitializer