From 9f206fb20b8f7b145e35918b37c94b47ec5a5ad5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 7 Sep 2017 06:11:10 +0000 Subject: [PATCH] [X86] Force shuffle lowering to only create X86ISD::VPERM2X128 with 64-bit element types so we can remove some patterns from isel. Intrinsic handling is still creating these nodes with 32-bit elements as well. But at least this gets rid of 8 and 16. Ideally, someday we'll convert the intrinsics to generic vector shuffles and remove the intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312702 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++++-- lib/Target/X86/X86InstrSSE.td | 20 -------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 95c14ca9c1f..e9aa51f3921 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12124,8 +12124,11 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, // the value 2 selects the low half of source 2. We only use source 2 to // allow folding it into a memory operand. unsigned PERMMask = 3 | 2 << 4; - SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT), - V1, DAG.getConstant(PERMMask, DL, MVT::i8)); + MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; + SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, PVT, DAG.getUNDEF(VT), + DAG.getBitcast(PVT, V1), + DAG.getConstant(PERMMask, DL, MVT::i8)); + Flipped = DAG.getBitcast(VT, Flipped); return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask); } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9fa31243d82..493f7f90f94 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7683,10 +7683,6 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), @@ -7694,12 +7690,6 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, - (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, - (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; } //===----------------------------------------------------------------------===// @@ -8110,17 +8100,7 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), let Predicates = [HasAVX2] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)), - (i8 imm:$imm))), - (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, - (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; -- 2.40.0