From: Tim Northover Date: Mon, 26 Jun 2017 20:34:13 +0000 (+0000) Subject: AArch64: legalize G_EXTRACT operations. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=05321d30b5e207353eddc473431097b173a7fef0;p=llvm AArch64: legalize G_EXTRACT operations. This is the dual problem to legalizing G_INSERTs so most of the code and testing was cribbed from there. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306328 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1d0d3dffa4c..121823247e1 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -158,7 +158,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. - if (TypeIdx != 0) + if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) return UnableToLegalize; MIRBuilder.setInstr(MI); @@ -193,6 +193,58 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_EXTRACT: { + if (TypeIdx != 1) + return UnableToLegalize; + + int64_t NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = + MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize; + + SmallVector SrcRegs, DstRegs; + SmallVector Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + unsigned OpReg = MI.getOperand(0).getReg(); + int64_t OpStart = MI.getOperand(2).getImm(); + int64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned SrcStart = i * NarrowSize; + + if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { + // No part of the extract uses this subregister, ignore it. + continue; + } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is extracted, forward the value. + DstRegs.push_back(SrcRegs[i]); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset, SegSize; + if (OpStart < SrcStart) { + ExtractOffset = 0; + SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); + } else { + ExtractOffset = OpStart - SrcStart; + SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); + } + + unsigned SegReg = SrcRegs[i]; + if (ExtractOffset != 0 || SegSize != NarrowSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); + } + + DstRegs.push_back(SegReg); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_INSERT: { if (TypeIdx != 0) return UnableToLegalize; diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 595802f2228..ef1ce7f87eb 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -42,6 +42,7 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar; + DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar; DefaultActions[TargetOpcode::G_FNEG] = Lower; } @@ -75,8 +76,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { // FIXME: the long-term plan calls for expansion in terms of load/store (if // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_EXTRACT || - Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || + if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 3c70013ea29..6fcc3907dd2 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -482,6 +482,9 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, "input operands do not cover output register"); #endif + if (Ops.size() == 1) + return buildCopy(Res, Ops[0]); + MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES); MIB.addDef(Res); for (unsigned i = 0; i < Ops.size(); ++i) diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 01196817f31..25779774949 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -99,6 +99,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap). } + for (auto Ty : {s1, s8, s16, s32, s64, p0}) + setAction({G_EXTRACT, Ty}, Legal); + + for (auto Ty : {s32, s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + for (unsigned MemOp : {G_LOAD, G_STORE}) { for (auto Ty : {s8, s16, s32, s64, p0, v2s32}) setAction({MemOp, Ty}, Legal); diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index a584eabcc1b..53215296ce6 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -228,10 +228,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX() { for (auto Ty : {v8s32, v4s64}) setAction({MemOp, Ty}, Legal); - for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) { setAction({G_INSERT, Ty}, Legal); - for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) { setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoAVX2() { @@ -280,10 +284,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { for (auto Ty : {v16s32, v8s64}) setAction({MemOp, Ty}, Legal); - for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) + for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) { setAction({G_INSERT, Ty}, Legal); - for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) { setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } /************ VLX *******************/ if (!Subtarget.hasVLX()) diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir index e3e0175d39a..fbacc28d743 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir @@ -57,11 +57,11 @@ body: | %0:_(s64) = COPY %x0 ; CHECK-LABEL: name: test_combines_4 - ; CHECK: %2(<2 x s32>) = G_EXTRACT %1(s128), 0 - ; CHECK: %3(<2 x s32>) = G_ADD %2, %2 + ; CHECK: %2(s64) = COPY %0(s64) + ; CHECK: %3(s64) = G_ADD %2, %2 %1:_(s128) = G_MERGE_VALUES %0, %0 - %2:_(<2 x s32>) = G_EXTRACT %1, 0 - %3:_(<2 x s32>) = G_ADD %2, %2 + %2:_(s64) = G_EXTRACT %1, 0 + %3:_(s64) = G_ADD %2, %2 ... --- diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll index 23e7d5163e5..d2452b86170 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll +++ b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll @@ -22,12 +22,11 @@ declare void @_Unwind_Resume(i8*) ; CHECK: [[SEL:%[0-9]+]](s32) = G_PTRTOINT [[SEL_PTR]] ; CHECK: [[STRUCT_SEL:%[0-9]+]](s64) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 0 -; CHECK: [[STRUCT:%[0-9]+]](s128) = G_MERGE_VALUES [[STRUCT_PTR]](s64), [[STRUCT_SEL]] - -; CHECK: [[PTR:%[0-9]+]](p0) = G_EXTRACT [[STRUCT]](s128), 0 +; CHECK: [[PTR:%[0-9]+]](p0) = COPY [[STRUCT_PTR]](s64) ; CHECK: G_STORE [[PTR]](p0), {{%[0-9]+}}(p0) -; CHECK: [[SEL:%[0-9]+]](s32) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[SEL_TMP:%[0-9]+]](s32) = G_EXTRACT [[STRUCT_SEL]](s64), 0 +; CHECK: [[SEL:%[0-9]+]](s32) = COPY [[SEL_TMP]] ; CHECK: G_STORE [[SEL]](s32), {{%[0-9]+}}(p0) define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir new file mode 100644 index 00000000000..dc6b59b24a9 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -0,0 +1,85 @@ +# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name: test_extracts_1 +body: | + bb.0: + liveins: %w0 + + ; Low part of extraction takes entirity of the low register entirely, so + ; value stored is forwarded directly from first load. + + ; CHECK-LABEL: name: test_extracts_1 + ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD + ; CHECK: {{%[0-9]+}}(s64) = G_LOAD + ; CHECK: [[VAL:%[0-9]+]](s64) = COPY [[LO]] + ; CHECK: G_STORE [[VAL]] + %0:_(s64) = COPY %x0 + %1:_(s32) = COPY %w1 + %2:_(p0) = COPY %x2 + %3:_(s128) = G_LOAD %2(p0) :: (load 16) + %4:_(s64) = G_EXTRACT %3(s128), 0 + G_STORE %4(s64), %2(p0) :: (store 8) + RET_ReallyLR +... + +--- +name: test_extracts_2 +body: | + bb.0: + liveins: %w0 + + ; Low extraction wipes takes whole low register. High extraction is real. + ; CHECK-LABEL: name: test_extracts_2 + ; CHECK: [[LO_TMP:%[0-9]+]](s64) = G_LOAD + ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD + ; CHECK: [[LO:%[0-9]+]](s64) = COPY [[LO_TMP]] + ; CHECK: [[NEWHI_TMP:%[0-9]+]](s32) = G_EXTRACT [[HI]](s64), 0 + ; CHECK: [[NEWHI:%[0-9]+]](s32) = COPY [[NEWHI_TMP]] + ; CHECK: G_STORE [[LO]] + ; CHECK: G_STORE [[NEWHI]] + %0:_(s64) = COPY %x0 + %1:_(s32) = COPY %w1 + %2:_(p0) = COPY %x2 + %3:_(s128) = G_LOAD %2(p0) :: (load 16) + %4:_(s64) = G_EXTRACT %3(s128), 0 + %5:_(s32) = G_EXTRACT %3(s128), 64 + G_STORE %4(s64), %2(p0) :: (store 8) + G_STORE %5(s32), %2(p0) :: (store 4) + RET_ReallyLR +... + +--- +name: test_extracts_3 +body: | + bb.0: + liveins: %x0, %x1, %x2 + + + ; CHECK-LABEL: name: test_extracts_3 + ; CHECK: [[LO:%[0-9]+]](s32) = G_EXTRACT %0(s64), 32 + ; CHECK: [[HI:%[0-9]+]](s32) = G_EXTRACT %1(s64), 0 + ; CHECK: %3(s64) = G_MERGE_VALUES [[LO]](s32), [[HI]](s32) + %0:_(s64) = COPY %x0 + %1:_(s64) = COPY %x1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s64) = G_EXTRACT %2, 32 + RET_ReallyLR +... + +--- +name: test_extracts_4 +body: | + bb.0: + liveins: %x0, %x1, %x2 + + + ; CHECK-LABEL: name: test_extracts_4 + ; CHECK: [[LO_TMP:%[0-9]+]](s32) = G_EXTRACT %0(s64), 32 + ; CHECK: %3(s32) = COPY [[LO_TMP]] + %0:_(s64) = COPY %x0 + %1:_(s64) = COPY %x1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 32 + RET_ReallyLR +...