LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
+ LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+
MachineRegisterInfo &MRI;
const LegalizerInfo &LI;
/// To keep track of changes made by the LegalizerHelper.
MI.eraseFromParent();
return Legalized;
}
+ case G_UITOFP:
+ return lowerUITOFP(MI, TypeIdx, Ty);
+ case G_SITOFP:
+ return lowerSITOFP(MI, TypeIdx, Ty);
}
}
}
}
}
+
+// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
+// representation.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
+
+ // unsigned cul2f(ulong u) {
+ // uint lz = clz(u);
+ // uint e = (u != 0) ? 127U + 63U - lz : 0;
+ // u = (u << lz) & 0x7fffffffffffffffUL;
+ // ulong t = u & 0xffffffffffUL;
+ // uint v = (e << 23) | (uint)(u >> 40);
+ // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+ // return as_float(v + r);
+ // }
+
+ auto Zero32 = MIRBuilder.buildConstant(S32, 0);
+ auto Zero64 = MIRBuilder.buildConstant(S64, 0);
+
+ auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
+
+ auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
+ auto Sub = MIRBuilder.buildSub(S32, K, LZ);
+
+ auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
+ auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
+
+ auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
+ auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
+
+ auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
+
+ auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
+ auto T = MIRBuilder.buildAnd(S64, U, Mask1);
+
+ auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
+ auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
+ auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
+
+ auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
+ auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
+ auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
+ auto One = MIRBuilder.buildConstant(S32, 1);
+
+ auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
+ auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
+ auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
+ MIRBuilder.buildAdd(Dst, V, R);
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (SrcTy != LLT::scalar(64))
+ return UnableToLegalize;
+
+ if (DstTy == LLT::scalar(32)) {
+ // TODO: SelectionDAG has several alternative expansions to port which may
+ // be more reasonble depending on the available instructions. If a target
+ // has sitofp, does not have CTLZ, or can efficiently use f64 as an
+ // intermediate type, this is probably worse.
+ return lowerU64ToF32BitOps(MI);
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ if (SrcTy != S64)
+ return UnableToLegalize;
+
+ if (DstTy == S32) {
+ // signed cl2f(long l) {
+ // long s = l >> 63;
+ // float r = cul2f((l + s) ^ s);
+ // return s ? -r : r;
+ // }
+ unsigned L = Src;
+ auto SignBit = MIRBuilder.buildConstant(S64, 63);
+ auto S = MIRBuilder.buildAShr(S64, L, SignBit);
+
+ auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
+ auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
+ auto R = MIRBuilder.buildUITOFP(S32, Xor);
+
+ auto RNeg = MIRBuilder.buildFNeg(S32, R);
+ auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
+ MIRBuilder.buildConstant(S64, 0));
+ MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalFor({{S32, S32}, {S64, S32}})
+ .lowerFor({{S32, S64}})
.scalarize(0);
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
%1:_(<2 x s64>) = G_SITOFP %0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
+
+---
+name: test_sitofp_s64_to_s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: test_sitofp_s64_to_s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+ ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+ ; CHECK: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV]], [[UV2]], [[C1]]
+ ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDE1]]
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE]](s32), [[UADDE2]](s32)
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
+ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[CTLZ_ZERO_UNDEF]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C3]]
+ ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C2]]
+ ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+ ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C5]]
+ ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+ ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C6]]
+ ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C7]](s64)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC1]](s32)
+ ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C8]](s32)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC2]]
+ ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+ ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C9]]
+ ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C9]]
+ ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C10]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C2]]
+ ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C10]], [[SELECT1]]
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+ ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64)
+ ; CHECK: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]]
+ ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C3]]
+ ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]]
+ ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s64)
+ ; CHECK: $vgpr0 = COPY [[SITOFP]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SITOFP %0
+ $vgpr0 = COPY %1
+...
$vgpr0_vgpr1 = COPY %1
...
+---
+name: test_uitofp_v2s32_to_v2s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: test_uitofp_v2s32_to_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+ ; CHECK: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UITOFP]](s32), [[UITOFP1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s32>) = G_UITOFP %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_uitofp_s64_to_s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: test_uitofp_s64_to_s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64)
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]]
+ ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]]
+ ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+ ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]]
+ ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+ ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]]
+ ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C5]](s64)
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32)
+ ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC1]]
+ ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+ ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]]
+ ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]]
+ ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]]
+ ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]]
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+ ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s64)
+ ; CHECK: $vgpr0 = COPY [[UITOFP]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_UITOFP %0
+ $vgpr0 = COPY %1
+...