[x86] add tests for unpack shuffle lowering; NFC

author Sanjay Patel <spatel@rotateright.com>

Thu, 24 Jan 2019 14:12:34 +0000 (14:12 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Thu, 24 Jan 2019 14:12:34 +0000 (14:12 +0000)
author Sanjay Patel <spatel@rotateright.com>
Thu, 24 Jan 2019 14:12:34 +0000 (14:12 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Thu, 24 Jan 2019 14:12:34 +0000 (14:12 +0000)
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll

index 461246d80a824d9b2db2d9886564e27b7292fe9f..4e447d9429d85120ca30ba18aadc3e6d75f77cce 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -4605,3 +4605,20 @@ define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
    %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    ret <16 x i16> %tmp3
  }
+
+define <16 x i16> @unpckh_v16i16(<16 x i16> %x, <16 x i16> %y) {
+; AVX1-LABEL: unpckh_v16i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2OR512VL-LABEL: unpckh_v16i16:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; AVX2OR512VL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2OR512VL-NEXT:    retq
+  %unpckh = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 4, i32 28, i32 5, i32 29, i32 6, i32 30, i32 7, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %unpckh
+}
+
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll

index ba0e2086f4888d8d07e176b9a2d592290180cdc8..d268364fda86232fc72dd373eec352777d7b9575 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -3321,3 +3321,20 @@ define <32 x i8> @zeroable_src_to_zext(<32 x i8> %a0) {
    %2 = shufflevector <32 x i8> %1, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 20, i32 21, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
    ret <32 x i8> %2
  }
+
+define <32 x i8> @unpckh_v32i8(<32 x i8> %x, <32 x i8> %y) {
+; AVX1-LABEL: unpckh_v32i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    retq
+;
+; AVX2OR512VL-LABEL: unpckh_v32i8:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; AVX2OR512VL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX2OR512VL-NEXT:    retq
+  %unpckh = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 8, i32 56, i32 9, i32 57, i32 10, i32 58, i32 11, i32 59, i32 12, i32 60, i32 13, i32 61, i32 14, i32 62, i32 15, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i8> %unpckh
+}
+
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll

index b3750b74ad34019cf5828f605735bc89cd96d097..83316d0a80cb79ced6dd0416508150be6aa5d8df 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -1903,3 +1903,64 @@ define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize {
    %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    ret <8 x i32> %b
  }
+
+; FIXME: AVX1 lowering is better than AVX2 (and AVX512?)
+
+define <4 x i64> @unpckh_v4i64(<4 x i64> %x, <4 x i64> %y) {
+; AVX1-LABEL: unpckh_v4i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: unpckh_v4i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-SLOW-LABEL: unpckh_v4i64:
+; AVX512VL-SLOW:       # %bb.0:
+; AVX512VL-SLOW-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
+; AVX512VL-SLOW-NEXT:    retq
+;
+; AVX512VL-FAST-LABEL: unpckh_v4i64:
+; AVX512VL-FAST:       # %bb.0:
+; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,7,3,7]
+; AVX512VL-FAST-NEXT:    vpermt2q %ymm1, %ymm2, %ymm0
+; AVX512VL-FAST-NEXT:    retq
+  %unpckh = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 1, i32 7, i32 undef, i32 undef>
+  ret <4 x i64> %unpckh
+}
+
+; FIXME: AVX1 lowering is better than AVX2 (and AVX512?)
+
+define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) {
+; AVX1-LABEL: unpckh_v4f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: unpckh_v4f64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-SLOW-LABEL: unpckh_v4f64:
+; AVX512VL-SLOW:       # %bb.0:
+; AVX512VL-SLOW-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
+; AVX512VL-SLOW-NEXT:    retq
+;
+; AVX512VL-FAST-LABEL: unpckh_v4f64:
+; AVX512VL-FAST:       # %bb.0:
+; AVX512VL-FAST-NEXT:    vmovapd {{.*#+}} ymm2 = [1,7,3,7]
+; AVX512VL-FAST-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
+; AVX512VL-FAST-NEXT:    retq
+  %unpckh = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 7, i32 undef, i32 undef>
+  ret <4 x double> %unpckh
+}
+
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll

index addf2d2563fc59bd593e7b771de3d642840dda88..b2187397bb15d45f0d57ab3de6eb7b01ce642a3e 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -2877,3 +2877,108 @@ entry:
    %tmp6 = insertelement <8 x float> %tmp5, float %z, i32 5
    ret <8 x float> %tmp6
  }
+
+; FIXME: AVX1 lowering is better than AVX2 (and AVX512?)
+; PR40434: https://bugs.llvm.org/show_bug.cgi?id=40434
+
+define <8 x i32> @unpckh_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; AVX1-LABEL: unpckh_v8i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: unpckh_v8i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = <2,6,3,7,u,u,u,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: unpckh_v8i32:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <2,14,3,15,u,u,u,u>
+; AVX512VL-NEXT:    vpermt2d %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT:    retq
+  %unpckh = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i32> %unpckh
+}
+
+; FIXME: Same as above but with floats. AVX1 lowering is better than AVX2 (and AVX512?)
+
+define <8 x float> @unpckh_v8f32(<8 x float> %x, <8 x float> %y) {
+; AVX1-LABEL: unpckh_v8f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: unpckh_v8f32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = <2,6,3,7,u,u,u,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: unpckh_v8f32:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovaps {{.*#+}} ymm2 = <2,14,3,15,u,u,u,u>
+; AVX512VL-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT:    retq
+  %unpckh = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %unpckh
+}
+
+; FIXME: Why are integer and FP (below) lowering different for AVX1?
+
+define <8 x i32> @lowhalf_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; AVX1-LABEL: lowhalf_v8i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,2]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: lowhalf_v8i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = <2,6,3,6,u,u,u,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: lowhalf_v8i32:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <2,14,3,14,u,u,u,u>
+; AVX512VL-NEXT:    vpermt2d %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT:    retq
+  %r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i32> %r
+}
+
+; FIXME: AVX1 lowering is better than AVX2 (and AVX512?)
+
+define <8 x float> @lowhalf_v8f32(<8 x float> %x, <8 x float> %y) {
+; AVX1-LABEL: lowhalf_v8f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[2,2]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: lowhalf_v8f32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = <2,6,3,6,u,u,u,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: lowhalf_v8f32:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovaps {{.*#+}} ymm2 = <2,14,3,14,u,u,u,u>
+; AVX512VL-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT:    retq
+  %r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %r
+}
+
author	Sanjay Patel <spatel@rotateright.com>
	Thu, 24 Jan 2019 14:12:34 +0000 (14:12 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Thu, 24 Jan 2019 14:12:34 +0000 (14:12 +0000)
test/CodeGen/X86/vector-shuffle-256-v16.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v32.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v4.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v8.ll		patch \| blob \| history