return true;
}
+/// \returns true if a bitmask for \p Size bits will be an inline immediate.
+static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
+ Mask = maskTrailingOnes<unsigned>(Size);
+ int SignedMask = static_cast<int>(Mask);
+ return SignedMask >= -16 && SignedMask <= 64;
+}
+
bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
const DebugLoc &DL = I.getDebugLoc();
if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
// 64-bit should have been split up in RegBankSelect
- //
- // TODO: USE V_AND_B32 when the constant mask is an inline immediate for
- // unsigned for smaller code size.
+
+ // Try to use an and with a mask if it will save code size.
+ unsigned Mask;
+ if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
+ MachineInstr *ExtI =
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
+ .addImm(Mask)
+ .addReg(SrcReg);
+ return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ }
+
const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
MachineInstr *ExtI =
BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
}
- BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
- .addReg(SrcReg)
- .addImm(SrcSize << 16);
+ unsigned Mask;
+ if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
+ .addReg(SrcReg)
+ .addImm(Mask);
+ } else {
+ BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
+ .addReg(SrcReg)
+ .addImm(SrcSize << 16);
+ }
+
return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
}
---
+name: anyext_sgpr_s8_to_sgpr_s32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GCN-LABEL: name: anyext_sgpr_s8_to_sgpr_s32
+ ; GCN: $sgpr0 = COPY %2:sreg_32_xm0
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s8) = G_TRUNC %0
+ %2:sgpr(s32) = G_ANYEXT %1
+ $sgpr0 = COPY %2
+
+...
+
+---
+
name: anyext_sgpr_s16_to_sgpr_s32
legalized: true
regBankSelected: true
---
+name: anyext_vgpr_s8_to_vgpr_s32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: anyext_vgpr_s8_to_vgpr_s32
+ ; GCN: $vgpr0 = COPY %2:vgpr_32
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s8) = G_TRUNC %0
+ %2:vgpr(s32) = G_ANYEXT %1
+ $vgpr0 = COPY %2
+
+...
+
+---
+
name: anyext_vgpr_s16_to_vgpr_s32
legalized: true
regBankSelected: true
---
+name: sext_sgpr_s8_to_sgpr_s32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GCN-LABEL: name: sext_sgpr_s8_to_sgpr_s32
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+ ; GCN: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32_xm0 = S_SEXT_I32_I8 [[COPY]]
+ ; GCN: $sgpr0 = COPY [[S_SEXT_I32_I8_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s8) = G_TRUNC %0
+ %2:sgpr(s32) = G_SEXT %1
+ $sgpr0 = COPY %2
+...
+
+---
+
name: sext_sgpr_s16_to_sgpr_s32
legalized: true
regBankSelected: true
---
+name: sext_vgpr_s8_to_vgpr_s32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: sext_vgpr_s8_to_vgpr_s32
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 8, implicit $exec
+ ; GCN: $vgpr0 = COPY [[V_BFE_I32_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s8) = G_TRUNC %0
+ %2:vgpr(s32) = G_SEXT %1
+ $vgpr0 = COPY %2
+
+...
+
+---
+
name: sext_vgpr_s16_to_vgpr_s32
legalized: true
regBankSelected: true
; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s32
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
- ; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32_xm0 = S_BFE_U32 [[COPY]], 65536, implicit-def $scc
- ; GCN: $sgpr0 = COPY [[S_BFE_U32_]]
+ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], 1, implicit-def $scc
+ ; GCN: $sgpr0 = COPY [[S_AND_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s1) = G_TRUNC %0
%2:sgpr(s32) = G_ZEXT %1
---
+name: zext_sgpr_s8_to_sgpr_s32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GCN-LABEL: name: zext_sgpr_s8_to_sgpr_s32
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+ ; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32_xm0 = S_BFE_U32 [[COPY]], 524288, implicit-def $scc
+ ; GCN: $sgpr0 = COPY [[S_BFE_U32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s8) = G_TRUNC %0
+ %2:sgpr(s32) = G_ZEXT %1
+ $sgpr0 = COPY %2
+
+...
+
+---
+
name: zext_sgpr_s16_to_sgpr_s32
legalized: true
regBankSelected: true
; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s32
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[COPY]], 0, 1, implicit $exec
- ; GCN: $vgpr0 = COPY [[V_BFE_U32_]]
+ ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
+ ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s1) = G_TRUNC %0
%2:vgpr(s32) = G_ZEXT %1
---
+name: zext_vgpr_s8_to_vgpr_s32
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: zext_vgpr_s8_to_vgpr_s32
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[COPY]], 0, 8, implicit $exec
+ ; GCN: $vgpr0 = COPY [[V_BFE_U32_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s8) = G_TRUNC %0
+ %2:vgpr(s32) = G_ZEXT %1
+ $vgpr0 = COPY %2
+
+...
+
+---
+
name: zext_vgpr_s16_to_vgpr_s32
legalized: true
regBankSelected: true