From: Matt Arsenault Date: Sat, 2 Feb 2019 23:56:00 +0000 (+0000) Subject: GlobalISel: Implement widenScalar for G_EXTRACT vector sources X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=757bd101c78fff13b00870638a64c3800a237282;p=llvm GlobalISel: Implement widenScalar for G_EXTRACT vector sources Handle the basic element extract case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352978 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index bd0ad3f8083..f1f7c98eb44 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -942,6 +942,32 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_EXTRACT: { + if (TypeIdx != 1) + return UnableToLegalize; + + unsigned SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isVector()) + return UnableToLegalize; + + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy != SrcTy.getElementType()) + return UnableToLegalize; + + unsigned Offset = MI.getOperand(2).getImm(); + if (Offset % SrcTy.getScalarSizeInBits() != 0) + return UnableToLegalize; + + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + + MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * + Offset); + widenScalarDst(MI, WideTy.getScalarType(), 0); + + return Legalized; + } case TargetOpcode::G_MERGE_VALUES: { if (TypeIdx != 1) return UnableToLegalize; diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1ce52e9a1a0..509d651eccf 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -439,6 +439,24 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, const LLT &Ty1 = Query.Types[1]; return (Ty0.getSizeInBits() % 16 == 0) && (Ty1.getSizeInBits() % 16 == 0); + }) + .widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT &Ty0 = Query.Types[0]; + const LLT &Ty1 = Query.Types[1]; + return (Ty1.getScalarSizeInBits() < 16); + }, + // TODO Use generic LegalizeMutation + [](const LegalityQuery &Query) { + LLT Ty1 = Query.Types[1]; + unsigned NewEltSizeInBits = + std::max(1 << Log2_32_Ceil(Ty1.getScalarSizeInBits()), 16); + if (Ty1.isVector()) { + return std::make_pair(1, LLT::vector(Ty1.getNumElements(), + NewEltSizeInBits)); + } + + return std::make_pair(1, LLT::scalar(NewEltSizeInBits)); }); // TODO: vectors of pointers diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index 3aa27eeff6c..75bbcaa6bba 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -103,3 +103,135 @@ body: | %1:_(s32) = G_EXTRACT %0, 64 S_ENDPGM implicit %1 ... + +--- +name: extract_s8_v4s8_offset0 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v4s8_offset8 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset8 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v4s8_offset16 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset16 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v4s8_offset24 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset24 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 48 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 24 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v3s8_offset16 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v3s8_offset16 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<3 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<3 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v5s1_offset4 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v5s1_offset4 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<5 x s16>), 80 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<5 x s1>) = G_IMPLICIT_DEF + %1:_(s1) = G_EXTRACT %0, 5 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_v2s16_v4s16_offset32 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<2 x s16>) = G_EXTRACT %0, 32 + $vgpr0 = COPY %1 +... + +--- +name: extract_v2s16_v6s16_offset32 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + %0:_(<6 x s16>) = G_IMPLICIT_DEF + %1:_(<2 x s16>) = G_EXTRACT %0, 32 + $vgpr0 = COPY %1 +...