From: Craig Topper Date: Tue, 17 Jan 2017 06:49:54 +0000 (+0000) Subject: [AVX-512] Add test cases showing missed opportunities to fold subvector broadcasts... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a3d53858cdc9f7fd459943c9b6e15e25b6ff1775;p=llvm [AVX-512] Add test cases showing missed opportunities to fold subvector broadcasts with a mask operation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292200 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vector-shuffle-masked.ll b/test/CodeGen/X86/vector-shuffle-masked.ll index e30ee2f4c71..c85db246fca 100644 --- a/test/CodeGen/X86/vector-shuffle-masked.ll +++ b/test/CodeGen/X86/vector-shuffle-masked.ll @@ -716,3 +716,185 @@ define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer ret <2 x double> %res } + +define <8 x float> @test_broadcast_2f64_8f32(<2 x double> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_2f64_8f32: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 = mem[0,1,0,1] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <2 x double>, <2 x double> *%p + %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> + %3 = bitcast <4 x double> %2 to <8 x float> + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> zeroinitializer + ret <8 x float> %res +} + +define <8 x i32> @test_broadcast_2i64_8i32(<2 x i64> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_2i64_8i32: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 = mem[0,1,0,1] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <2 x i64>, <2 x i64> *%p + %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> + %3 = bitcast <4 x i64> %2 to <8 x i32> + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> zeroinitializer + ret <8 x i32> %res +} + +define <16 x float> @test_broadcast_2f64_16f32(<2 x double> *%p, i16 %mask) nounwind { +; CHECK-LABEL: test_broadcast_2f64_16f32: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1] +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <2 x double>, <2 x double> *%p + %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> + %3 = bitcast <8 x double> %2 to <16 x float> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer + ret <16 x float> %res +} + +define <16 x i32> @test_broadcast_2i64_16i32(<2 x i64> *%p, i16 %mask) nounwind { +; CHECK-LABEL: test_broadcast_2i64_16i32: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1] +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <2 x i64>, <2 x i64> *%p + %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> + %3 = bitcast <8 x i64> %2 to <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer + ret <16 x i32> %res +} + +define <16 x float> @test_broadcast_4f64_16f32(<4 x double> *%p, i16 %mask) nounwind { +; CHECK-LABEL: test_broadcast_4f64_16f32: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <4 x double>, <4 x double> *%p + %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> + %3 = bitcast <8 x double> %2 to <16 x float> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer + ret <16 x float> %res +} + +define <16 x i32> @test_broadcast_4i64_16i32(<4 x i64> *%p, i16 %mask) nounwind { +; CHECK-LABEL: test_broadcast_4i64_16i32: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <4 x i64>, <4 x i64> *%p + %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> + %3 = bitcast <8 x i64> %2 to <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer + ret <16 x i32> %res +} + +define <4 x double> @test_broadcast_4f32_4f64(<4 x float> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_4f32_4f64: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <4 x float>, <4 x float> *%p + %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> + %3 = bitcast <8 x float> %2 to <4 x double> + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> + %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> zeroinitializer + ret <4 x double> %res +} + +define <4 x i64> @test_broadcast_4i32_4i64(<4 x i32> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_4i32_4i64: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <4 x i32>, <4 x i32> *%p + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> + %3 = bitcast <8 x i32> %2 to <4 x i64> + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> + %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> zeroinitializer + ret <4 x i64> %res +} + +define <8 x double> @test_broadcast_4f32_8f64(<4 x float> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_4f32_8f64: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <4 x float>, <4 x float> *%p + %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> + %3 = bitcast <16 x float> %2 to <8 x double> + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer + ret <8 x double> %res +} + +define <8 x i64> @test_broadcast_4i32_8i64(<4 x i32> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_4i32_8i64: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <4 x i32>, <4 x i32> *%p + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> + %3 = bitcast <16 x i32> %2 to <8 x i64> + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer + ret <8 x i64> %res +} + +define <8 x double> @test_broadcast_8f32_8f64(<8 x float> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_8f32_8f64: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <8 x float>, <8 x float> *%p + %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> + %3 = bitcast <16 x float> %2 to <8 x double> + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer + ret <8 x double> %res +} + +define <8 x i64> @test_broadcast_8i32_8i64(<8 x i32> *%p, i8 %mask) nounwind { +; CHECK-LABEL: test_broadcast_8i32_8i64: +; CHECK: # BB#0: +; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %1 = load <8 x i32>, <8 x i32> *%p + %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> + %3 = bitcast <16 x i32> %2 to <8 x i64> + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer + ret <8 x i64> %res +}