case AMDGPU::G_ZEXT: {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy != LLT::scalar(1))
- return;
+ bool Signed = Opc == AMDGPU::G_SEXT;
MachineIRBuilder B(MI);
- bool Signed = Opc == AMDGPU::G_SEXT;
+ const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
- if (SrcBank->getID() == AMDGPU::SCCRegBankID ||
- SrcBank->getID() == AMDGPU::VCCRegBankID) {
- const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
- unsigned DstSize = DstTy.getSizeInBits();
+ if (DstTy.isScalar() &&
+ SrcBank != &AMDGPU::SGPRRegBank &&
+ SrcBank != &AMDGPU::SCCRegBank &&
+ SrcBank != &AMDGPU::VCCRegBank &&
+ // FIXME: Should handle any type that round to s64 when irregular
+ // breakdowns supported.
+ DstTy.getSizeInBits() == 64 &&
+ SrcTy.getSizeInBits() <= 32) {
+ const LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+ // Extend to 32-bit, and then extend the low half.
+ if (Signed) {
+ // TODO: Should really be buildSExtOrCopy
+ B.buildSExtOrTrunc(DefRegs[0], SrcReg);
+
+ // Replicate sign bit from 32-bit extended part.
+ auto ShiftAmt = B.buildConstant(S32, 31);
+ MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
+ B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
+ } else {
+ B.buildZExtOrTrunc(DefRegs[0], SrcReg);
+ B.buildConstant(DefRegs[1], 0);
+ }
+ MRI.setRegBank(DstReg, *SrcBank);
+ MI.eraseFromParent();
+ return;
+ }
+
+ if (SrcTy != LLT::scalar(1))
+ return;
+
+ if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+ const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
+ &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
+
+ unsigned DstSize = DstTy.getSizeInBits();
// 64-bit select is SGPR only
const bool UseSel64 = DstSize > 32 &&
SrcBank->getID() == AMDGPU::SCCRegBankID;
MRI.setRegBank(True.getReg(0), *DstBank);
MRI.setRegBank(False.getReg(0), *DstBank);
+ MRI.setRegBank(DstReg, *DstBank);
+
if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
- auto Sel = B.buildSelect(SelType, SrcReg, True, False);
- MRI.setRegBank(Sel.getReg(0), *DstBank);
- B.buildMerge(DstReg, { Sel.getReg(0), Sel.getReg(0) });
+ B.buildSelect(DefRegs[0], SrcReg, True, False);
+ B.buildCopy(DefRegs[1], DefRegs[0]);
} else if (DstSize < 32) {
auto Sel = B.buildSelect(SelType, SrcReg, True, False);
MRI.setRegBank(Sel.getReg(0), *DstBank);
break;
}
- OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+ // TODO: Should anyext be split into 32-bit part as well?
+ if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
+ OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+ } else {
+ // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+ // 32-bits, and then to 64.
+ OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+ SrcSize);
+ }
break;
}
case AMDGPU::G_FCMP: {
%1:_(s64) = G_SEXT %0
...
+---
+name: sext_s16_to_s64_s
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; CHECK-LABEL: name: sext_s16_to_s64_s
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s16)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_SEXT %1
+...
+
---
name: sext_s32_to_s64_v
legalized: true
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: sext_s32_to_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s64) = G_SEXT [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+ ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s64) = G_SEXT %0
...
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
; CHECK-LABEL: name: sext_s1_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+ ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_SEXT %1
...
+
+---
+name: sext_s16_to_s64_vgpr
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: sext_s16_to_s64_vgpr
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s16)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+ ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_SEXT %1
+...
%1:_(s64) = G_ZEXT %0
...
+---
+name: zext_s16_to_s64_s
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; CHECK-LABEL: name: zext_s16_to_s64_s
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s16)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_ZEXT %1
+...
+
---
name: zext_s32_to_s64_v
legalized: true
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: zext_s32_to_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s64) = G_ZEXT %0
...
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
; CHECK-LABEL: name: zext_s1_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_ZEXT %1
...
+
+---
+name: zext_s16_to_s64_vgpr
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: zext_s16_to_s64_vgpr
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_ZEXT %1
+...