From: Simon Pilgrim Date: Sat, 16 Dec 2017 23:32:18 +0000 (+0000) Subject: [X86][AVX] lowerVectorShuffleAsBroadcast - aggressively peek through BITCASTs X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9974ce94da3e1207266b315586565a340a2b10b7;p=llvm [X86][AVX] lowerVectorShuffleAsBroadcast - aggressively peek through BITCASTs Assuming we can safely adjust the broadcast index for the new type to keep it suitably aligned, then peek through BITCASTs when looking for the broadcast source. Fixes PR32007 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320933 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b3dddb902d7..d48fc7b6417 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10346,9 +10346,16 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, for (;;) { switch (V.getOpcode()) { case ISD::BITCAST: { + // Peek through bitcasts as long as BroadcastIdx can be adjusted. SDValue VSrc = V.getOperand(0); - MVT SrcVT = VSrc.getSimpleValueType(); - if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits()) + unsigned NumEltBits = V.getScalarValueSizeInBits(); + unsigned NumSrcBits = VSrc.getScalarValueSizeInBits(); + if ((NumEltBits % NumSrcBits) == 0) + BroadcastIdx *= (NumEltBits / NumSrcBits); + else if ((NumSrcBits % NumEltBits) == 0 && + (BroadcastIdx % (NumSrcBits / NumEltBits)) == 0) + BroadcastIdx /= (NumSrcBits / NumEltBits); + else break; V = VSrc; continue; @@ -10380,6 +10387,23 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, break; } + // Ensure the source vector and BroadcastIdx are for a suitable type. + if (VT.getScalarSizeInBits() != V.getScalarValueSizeInBits()) { + unsigned NumEltBits = VT.getScalarSizeInBits(); + unsigned NumSrcBits = V.getScalarValueSizeInBits(); + if ((NumSrcBits % NumEltBits) == 0) + BroadcastIdx *= (NumSrcBits / NumEltBits); + else if ((NumEltBits % NumSrcBits) == 0 && + (BroadcastIdx % (NumEltBits / NumSrcBits)) == 0) + BroadcastIdx /= (NumEltBits / NumSrcBits); + else + return SDValue(); + + unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits; + MVT SrcVT = MVT::getVectorVT(VT.getScalarType(), NumSrcElts); + V = DAG.getBitcast(SrcVT, V); + } + // Check if this is a broadcast of a scalar. We special case lowering // for scalars so that we can more effectively fold with loads. // First, look through bitcast: if the original value has a larger element diff --git a/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll b/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll index abc49d0ad88..c7291b02ae0 100644 --- a/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll +++ b/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll @@ -579,8 +579,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>* define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) { ; CHECK-LABEL: test_2xfloat_to_8xfloat_mem: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -589,10 +588,9 @@ define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) { define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -604,10 +602,9 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, < define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -618,10 +615,9 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -633,10 +629,9 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, < define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -647,10 +642,9 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -662,10 +656,9 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, < define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -676,10 +669,9 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -691,10 +683,9 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, < define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> @@ -705,8 +696,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) { ; CHECK-LABEL: test_2xfloat_to_16xfloat_mem: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 +; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -715,10 +705,9 @@ define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) { define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -730,10 +719,9 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -744,10 +732,9 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %v define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -759,10 +746,9 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -773,10 +759,9 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %v define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -788,10 +773,9 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -802,10 +786,9 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %v define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> @@ -817,10 +800,9 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> diff --git a/test/CodeGen/X86/widened-broadcast.ll b/test/CodeGen/X86/widened-broadcast.ll index 3de39f72022..96e97c70dbf 100644 --- a/test/CodeGen/X86/widened-broadcast.ll +++ b/test/CodeGen/X86/widened-broadcast.ll @@ -121,21 +121,10 @@ define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtabl ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_8i32_4i32_01010101: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_8i32_4i32_01010101: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_8i32_4i32_01010101: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_8i32_4i32_01010101: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <4 x i32>, <4 x i32>* %ptr %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> @@ -257,21 +246,10 @@ define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounw ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_16i16_8i16_0123012301230123: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_16i16_8i16_0123012301230123: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_16i16_8i16_0123012301230123: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_16i16_8i16_0123012301230123: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <8 x i16>, <8 x i16>* %ptr %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> @@ -468,21 +446,10 @@ define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <16 x i8>, <16 x i8>* %ptr %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32>