From 0401164ba787b3ae2f79c9031ba488466a2ef3fb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:13:27 +0000 Subject: [PATCH] GlobalISel: Partially implement lower for G_INSERT git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373946 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 41 +++++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +- .../AMDGPU/GlobalISel/legalize-insert.mir | 154 +++++++++++++++++- 4 files changed, 193 insertions(+), 13 deletions(-) diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 710a9f81d3b..bf60319996a 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -231,6 +231,7 @@ public: LegalizeResult lowerShuffleVector(MachineInstr &MI); LegalizeResult lowerDynStackAlloc(MachineInstr &MI); LegalizeResult lowerExtract(MachineInstr &MI); + LegalizeResult lowerInsert(MachineInstr &MI); private: MachineRegisterInfo &MRI; diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 861c22d605a..684b99d8bae 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2249,6 +2249,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerDynStackAlloc(MI); case G_EXTRACT: return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -4134,3 +4136,42 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { return UnableToLegalize; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ce724e360ae..dfb8ed55d6b 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -912,13 +912,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1; // FIXME: Doesn't handle extract of illegal sizes. - auto &Builder = getActionDefinitionsBuilder(Op); - - // FIXME: Cleanup when G_INSERT lowering implemented. - if (Op == G_EXTRACT) - Builder.lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))); - - Builder + getActionDefinitionsBuilder(Op) + .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; const LLT LitTy = Query.Types[LitTyIdx]; diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index 066932ec480..506dcfe0e8a 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -762,15 +762,48 @@ body: | ; CHECK-LABEL: name: test_insert_v2s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK: $vgpr0 = COPY [[INSERT]](<2 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 %3:_(<2 x s16>) = G_INSERT %0, %2, 0 $vgpr0 = COPY %3 ... + +--- +name: test_insert_v2s16_s16_offset1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_v2s16_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65535 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(<2 x s16>) = G_INSERT %0, %2, 1 + $vgpr0 = COPY %3 +... --- name: test_insert_v2s16_s16_offset16 body: | @@ -780,9 +813,17 @@ body: | ; CHECK-LABEL: name: test_insert_v2s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK: $vgpr0 = COPY [[INSERT]](<2 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1247,3 +1288,104 @@ body: | %3:_(s64) = G_INSERT %0, %2, 48 $vgpr0_vgpr1 = COPY %3 ... +--- +name: test_insert_s32_s16_offset0 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 0 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65535 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 1 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65281 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 8 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 16 + $vgpr0 = COPY %3 +... -- 2.50.1