From b1b624d08a35e0efbead95a4072f613b788b9cbb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 18 Feb 2019 22:39:27 +0000 Subject: [PATCH] GlobalISel: Implement widenScalar for g_extract scalar results git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354293 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 9 + include/llvm/IR/DataLayout.h | 9 +- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 54 +++- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 5 +- .../AMDGPU/GlobalISel/legalize-extract.mir | 293 ++++++++++++++++++ 5 files changed, 359 insertions(+), 11 deletions(-) diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 679e3a0c3d2..5ad757769cc 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -237,6 +237,10 @@ public: return *State.MF; } + const DataLayout &getDataLayout() const { + return getMF().getFunction().getParent()->getDataLayout(); + } + /// Getter for DebugLoc const DebugLoc &getDL() { return State.DL; } @@ -465,6 +469,11 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op); + /// Build and insert a G_PTRTOINT instruction. + MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src}); + } + /// \return The opcode of the extension the target wants to use for boolean /// values. unsigned getBoolExtOp(bool IsVec, bool IsFP) const; diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index 274ec2351d8..869b27b1fda 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -345,10 +345,13 @@ public: return NonIntegralAddressSpaces; } - bool isNonIntegralPointerType(PointerType *PT) const { + bool isNonIntegralAddressSpace(unsigned AddrSpace) const { ArrayRef NonIntegralSpaces = getNonIntegralAddressSpaces(); - return find(NonIntegralSpaces, PT->getAddressSpace()) != - NonIntegralSpaces.end(); + return find(NonIntegralSpaces, AddrSpace) != NonIntegralSpaces.end(); + } + + bool isNonIntegralPointerType(PointerType *PT) const { + return isNonIntegralAddressSpace(PT->getAddressSpace()); } bool isNonIntegralPointerType(Type *Ty) const { diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e8b5b8cd3b5..38f0e221431 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -846,20 +846,62 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx != 1) - return UnableToLegalize; - + unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(DstReg); + unsigned Offset = MI.getOperand(2).getImm(); + + if (TypeIdx == 0) { + if (SrcTy.isVector() || DstTy.isVector()) + return UnableToLegalize; + + SrcOp Src(SrcReg); + if (SrcTy.isPointer()) { + // Extracts from pointers can be handled only if they are really just + // simple integers. + const DataLayout &DL = MIRBuilder.getDataLayout(); + if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) + return UnableToLegalize; + + LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); + SrcTy = SrcAsIntTy; + } + + if (DstTy.isPointer()) + return UnableToLegalize; + + if (Offset == 0) { + // Avoid a shift in the degenerate case. + MIRBuilder.buildTrunc(DstReg, + MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); + MI.eraseFromParent(); + return Legalized; + } + + // Do a shift in the source type. + LLT ShiftTy = SrcTy; + if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { + Src = MIRBuilder.buildAnyExt(WideTy, Src); + ShiftTy = WideTy; + } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) + return UnableToLegalize; + + auto LShr = MIRBuilder.buildLShr( + ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); + MIRBuilder.buildTrunc(DstReg, LShr); + MI.eraseFromParent(); + return Legalized; + } + if (!SrcTy.isVector()) return UnableToLegalize; - unsigned DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); if (DstTy != SrcTy.getElementType()) return UnableToLegalize; - unsigned Offset = MI.getOperand(2).getImm(); if (Offset % SrcTy.getScalarSizeInBits() != 0) return UnableToLegalize; diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4487bffacdc..58fd948ccc5 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -516,9 +516,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .widenScalarIf( [=](const LegalityQuery &Query) { const LLT Ty1 = Query.Types[1]; - return (Ty1.getScalarSizeInBits() < 16); + return Ty1.isVector() && Ty1.getScalarSizeInBits() < 16; }, - LegalizeMutations::widenScalarOrEltToNextPow2(1, 16)); + LegalizeMutations::widenScalarOrEltToNextPow2(1, 16)) + .clampScalar(0, S16, S256); // TODO: vectors of pointers getActionDefinitionsBuilder(G_BUILD_VECTOR) diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index c049231a3a4..58ed38d616d 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -523,3 +523,296 @@ body: | %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 ... + +--- +name: test_extract_s8_s16_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s8) = G_EXTRACT %1, 0 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s16_offset1 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s8) = G_EXTRACT %1, 1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s16_offset8 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s16_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s8) = G_EXTRACT %1, 8 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s32_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset1 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 1 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset8 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset16 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s32_offset24 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_s32_offset24 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_p3_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_p3_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_p3_offset8 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s8_p3_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s8) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s1_s8_offset0 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s1_s8_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s1) = G_EXTRACT %1, 0 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s1_s8_offset2 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_extract_s1_s8_offset2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s1) = G_EXTRACT %1, 2 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_extract_s8_s64_offset2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s8_s64_offset2 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s8) = G_EXTRACT %0, 2 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s8_s64_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s8_s64_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s16_s64_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s16_s64_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s16_s64_offset32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s16_s64_offset32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_EXTRACT %0, 32 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_extract_s16_s64_offset48 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_extract_s16_s64_offset48 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 48 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_EXTRACT %0, 48 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... -- 2.40.0