From 230bc3fabf9fdd02de50ca2db93e2461ae35ea38 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 27 Jan 2019 00:52:51 +0000 Subject: [PATCH] GlobalISel: Implement narrowScalar for mul git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352300 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 + .../CodeGen/GlobalISel/MachineIRBuilder.h | 18 +++++++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 47 +++++++++++++++++++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 1 + .../AMDGPU/GlobalISel/legalize-mul.mir | 26 ++++++++++ 5 files changed, 94 insertions(+) diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index e28a4962063..93841286869 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -125,6 +125,8 @@ private: LegalizeResult fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); MachineRegisterInfo &MRI; diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index e5542964a08..b75040e8320 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1123,6 +1123,24 @@ public: return buildInstr(TargetOpcode::G_MUL, {Dst}, {Src0, Src1}, Flags); } + MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_UMULH, {Dst}, {Src0, Src1}, Flags); + } + + MachineInstrBuilder buildSMulH(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags); + } + + MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_SHL, {Dst}, {Src0, Src1}, Flags); + } + /// Build and insert \p Res = G_AND \p Op0, \p Op1 /// /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index cf997a8926c..2ab35645064 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -343,6 +343,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_MUL: + return narrowScalarMul(MI, TypeIdx, NarrowTy); case TargetOpcode::G_EXTRACT: { if (TypeIdx != 1) return UnableToLegalize; @@ -1526,6 +1528,51 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, } } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Src0 = MI.getOperand(1).getReg(); + unsigned Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(DstReg); + if (Ty.isVector()) + return UnableToLegalize; + + unsigned Size = Ty.getSizeInBits(); + unsigned NewSize = Size / 2; + if (Size != 2 * NewSize) + return UnableToLegalize; + + LLT HalfTy = LLT::scalar(NewSize); + // TODO: if HalfTy != NewTy, handle the breakdown all at once? + + unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); + unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); + unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); + unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); + unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); + unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); + + SmallVector Src0Parts; + SmallVector Src1Parts; + + extractParts(Src0, HalfTy, 2, Src0Parts); + extractParts(Src1, HalfTy, 2, Src1Parts); + + MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); + + // TODO: Use smulh or umulh depending on what the target has. + MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); + + MIRBuilder.buildConstant(ShiftAmt, NewSize); + MIRBuilder.buildAnyExt(ExtHi, Hi); + MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); + + MIRBuilder.buildZExt(ExtLo, Lo); + MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { unsigned Opc = MI.getOpcode(); diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ecbfada17ba..9e6fa061c10 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -95,6 +95,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_UMULH, G_SMULH}) .legalFor({S32}) + .clampScalar(0, S32, S32) .scalarize(0); // Report legal for any types we can handle anywhere. For the cases only legal diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index 5e55a6b5482..e7b79490b77 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -38,3 +38,29 @@ body: | %2:_(<2 x s32>) = G_MUL %0, %1 $vgpr0_vgpr1 = COPY %2 ... + +--- +name: test_mul_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_mul_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; CHECK: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UMULH]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[TRUNC]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[MUL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_MUL %0, %1 + $vgpr0_vgpr1 = COPY %2 +... -- 2.50.1