From: Ahmed Bougacha Date: Thu, 11 Dec 2014 23:07:52 +0000 (+0000) Subject: [X86] Add a temporary testcase for PR21876/r223996. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=11fcb483069abef41d89091a33cc55d747424560;p=llvm [X86] Add a temporary testcase for PR21876/r223996. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224074 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a4af16a14b4..a9ca166c1c7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6178,6 +6178,7 @@ multiclass SS41I_pmovx_avx2_patterns { (!cast(OpcPrefix#DQYrr) VR128:$src)>; // On AVX2, we also support 256bit inputs. + // FIXME: remove these patterns when the old shuffle lowering goes away. def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))), (!cast(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))), diff --git a/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll b/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll new file mode 100644 index 00000000000..44eb42adb9f --- /dev/null +++ b/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -x86-experimental-vector-shuffle-lowering=false -mattr=+avx2 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin" + +; PR21876 +; The old shuffle lowering sometimes generates VZEXT nodes with both input +; and output same-sized types, here 256-bits. For instance, a v8i8 to v8i32 +; zero-extend would become a (v8i32 (VZEXT v32i8)) node, which can't happen +; otherwise. The companion commit r223996 added those patterns temporarily. +; This test, along with the VR256 for AVX2 PMOVXrr instructions, should be +; removed once the old vector shuffle lowering goes away. + +define void @test_avx2_pmovx_256(<8 x i8>* %tmp64, <8 x float>* %tmp75) { +; CHECK-LABEL: test_avx2_pmovx_256 +; We really don't care about the generated code. +; CHECK: vpmovzxbd +; CHECK: vpbroadcastd +; CHECK: vpand +; CHECK: vcvtdq2ps +; CHECK: vmovups +; CHECK: vzeroupper +; CHECK: retq + + %wide.load458 = load <8 x i8>* %tmp64, align 1 + %tmp68 = uitofp <8 x i8> %wide.load458 to <8 x float> + store <8 x float> %tmp68, <8 x float>* %tmp75, align 4 + ret void +}