From c595a18bd8628f8bed2a77ccbad656d6f26c5fa9 Mon Sep 17 00:00:00 2001 From: Craig Topper <craig.topper@intel.com> Date: Sat, 30 Dec 2017 06:45:43 +0000 Subject: [PATCH] [X86] Custom legalize vXi1 extract_subvector with KSHIFTR. This allows us to remove some isel patterns. This is mostly NFC, but we now use KSHIFTB instead of KSHIFTW with DQI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 46 +++++++++++++++++-- lib/Target/X86/X86InstrAVX512.td | 43 ----------------- .../avx512-extract-subvector-load-store.ll | 22 ++++----- test/CodeGen/X86/avx512-skx-insert-subvec.ll | 4 +- test/CodeGen/X86/pr33349.ll | 2 +- 5 files changed, 57 insertions(+), 60 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8bc25c9f080..833305ae0bd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1186,9 +1186,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); - for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, - MVT::v16i1, MVT::v32i1, MVT::v64i1 }) - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + for (auto VT : { MVT::v1i1, MVT::v8i1 }) + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); @@ -1428,6 +1427,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); + for (auto VT : { MVT::v16i1, MVT::v32i1 }) + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Extends from v32i1 masks to 256-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); @@ -1540,6 +1541,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); + for (auto VT : { MVT::v2i1, MVT::v4i1 }) + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Extends from v2i1/v4i1 masks to 128-bit vectors. setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); @@ -15070,6 +15073,42 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, return insert1BitVector(Op, DAG, Subtarget); } +static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 && + "Only vXi1 extract_subvectors need custom lowering"); + + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + + if (!isa<ConstantSDNode>(Idx)) + return SDValue(); + + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (IdxVal == 0) // the operation is legal + return Op; + + MVT VecVT = Vec.getSimpleValueType(); + unsigned NumElems = VecVT.getVectorNumElements(); + + // Extend to natively supported kshift. + MVT WideVecVT = VecVT; + if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { + WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, + DAG.getUNDEF(WideVecVT), Vec, + DAG.getIntPtrConstant(0, dl)); + } + + // Shift to the LSB. + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, + DAG.getIntPtrConstant(0, dl)); +} + // Returns the appropriate wrapper opcode for a global reference. unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const { // References to absolute symbols are never PC-relative. @@ -24595,6 +24634,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index dcd84930741..5b7b9baf003 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3087,49 +3087,6 @@ defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; - -multiclass vextract_for_mask_to_mask<string InstrStr, X86KVectorVTInfo From, - X86KVectorVTInfo To, Predicate prd> { -let Predicates = [prd] in - def : - Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))), - (To.KVT(COPY_TO_REGCLASS - (!cast<Instruction>(InstrStr#"ri") From.KVT:$src, - (i8 imm:$imm8)), To.KRC))>; -} - -multiclass vextract_for_mask_to_mask_legal_w<X86KVectorVTInfo From, - X86KVectorVTInfo To> { -def : - Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))), - (To.KVT(COPY_TO_REGCLASS - (KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16), - (i8 imm:$imm8)), To.KRC))>; -} - -defm : vextract_for_mask_to_mask_legal_w<v2i1_info, v1i1_info>; -defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v1i1_info>; -defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v1i1_info>; -defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v2i1_info>; -defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v2i1_info>; -defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v4i1_info>; - -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>; - // Patterns for kmask shift multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> { def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))), diff --git a/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/test/CodeGen/X86/avx512-extract-subvector-load-store.ll index e1ed8ea98a1..1f2e77dbc62 100644 --- a/test/CodeGen/X86/avx512-extract-subvector-load-store.ll +++ b/test/CodeGen/X86/avx512-extract-subvector-load-store.ll @@ -6,7 +6,7 @@ define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub ; AVX512-LABEL: load_v8i1_broadcast_4_v2i1: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $4, %k0, %k0 +; AVX512-NEXT: kshiftrb $4, %k0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %xmm2 ; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512-NEXT: vpmovq2m %xmm2, %k1 @@ -37,7 +37,7 @@ define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub ; AVX512-LABEL: load_v8i1_broadcast_7_v2i1: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $6, %k0, %k0 +; AVX512-NEXT: kshiftrb $6, %k0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %xmm2 ; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] ; AVX512-NEXT: vpmovq2m %xmm2, %k1 @@ -624,7 +624,7 @@ define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v2i1_broadcast_1_v1i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $1, %k0, %k0 +; AVX512-NEXT: kshiftrb $1, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; @@ -645,7 +645,7 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $1, %k0, %k0 +; AVX512-NEXT: kshiftrb $1, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; @@ -666,7 +666,7 @@ define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $2, %k0, %k0 +; AVX512-NEXT: kshiftrb $2, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; @@ -687,7 +687,7 @@ define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v4i1_broadcast_2_v1i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $2, %k0, %k0 +; AVX512-NEXT: kshiftrb $2, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; @@ -708,7 +708,7 @@ define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v4i1_broadcast_3_v1i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $3, %k0, %k0 +; AVX512-NEXT: kshiftrb $3, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; @@ -729,7 +729,7 @@ define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v8i1_broadcast_4_v1i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $4, %k0, %k0 +; AVX512-NEXT: kshiftrb $4, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; @@ -750,7 +750,7 @@ define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) { ; AVX512-LABEL: load_v8i1_broadcast_4_v2i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $4, %k0, %k0 +; AVX512-NEXT: kshiftrb $4, %k0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 ; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0 ; AVX512-NEXT: vpmovq2m %xmm0, %k0 @@ -779,7 +779,7 @@ define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v8i1_broadcast_7_v1i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $7, %k0, %k0 +; AVX512-NEXT: kshiftrb $7, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; @@ -800,7 +800,7 @@ define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) { ; AVX512-LABEL: load_v8i1_broadcast_7_v2i1_store: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovb (%rdi), %k0 -; AVX512-NEXT: kshiftrw $6, %k0, %k0 +; AVX512-NEXT: kshiftrb $6, %k0, %k0 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; AVX512-NEXT: vpmovq2m %xmm0, %k0 diff --git a/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/test/CodeGen/X86/avx512-skx-insert-subvec.ll index 6bee0de181a..f6cb093d521 100644 --- a/test/CodeGen/X86/avx512-skx-insert-subvec.ll +++ b/test/CodeGen/X86/avx512-skx-insert-subvec.ll @@ -136,7 +136,7 @@ define <4 x i1> @test9(<8 x i1> %a, <8 x i1> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 ; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: kshiftrw $4, %k0, %k0 +; CHECK-NEXT: kshiftrb $4, %k0, %k0 ; CHECK-NEXT: vpmovm2d %k0, %xmm0 ; CHECK-NEXT: retq %res = shufflevector <8 x i1> %a, <8 x i1> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -148,7 +148,7 @@ define <2 x i1> @test10(<4 x i1> %a, <4 x i1> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 -; CHECK-NEXT: kshiftrw $2, %k0, %k0 +; CHECK-NEXT: kshiftrb $2, %k0, %k0 ; CHECK-NEXT: vpmovm2q %k0, %xmm0 ; CHECK-NEXT: retq %res = shufflevector <4 x i1> %a, <4 x i1> %b, <2 x i32> <i32 2, i32 3> diff --git a/test/CodeGen/X86/pr33349.ll b/test/CodeGen/X86/pr33349.ll index b1428ba6667..8f9c861d9ec 100644 --- a/test/CodeGen/X86/pr33349.ll +++ b/test/CodeGen/X86/pr33349.ll @@ -40,7 +40,7 @@ target triple = "x86_64-unknown-linux-gnu" ; SKX: # %bb.0: # %bb ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 -; SKX-NEXT: kshiftrw $2, %k0, %k1 +; SKX-NEXT: kshiftrb $2, %k0, %k1 ; SKX-NEXT: kshiftrw $1, %k1, %k2 ; SKX-NEXT: kmovd %k2, %eax ; SKX-NEXT: testb $1, %al -- 2.40.0