if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
return None;
- // Now try to match the G_SHL.
- MachineInstr *Shl =
- getOpcodeDef(TargetOpcode::G_SHL, Gep->getOperand(2).getReg(), MRI);
- if (!Shl || !isWorthFoldingIntoExtendedReg(*Shl, MRI))
+ // Now, try to match an opcode which will match our specific offset.
+ // We want a G_SHL or a G_MUL.
+ MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
+ if (!OffsetInst)
return None;
- // Now, try to find the specific G_CONSTANT.
- auto ValAndVReg =
- getConstantVRegValWithLookThrough(Shl->getOperand(2).getReg(), MRI);
- if (!ValAndVReg)
+ unsigned OffsetOpc = OffsetInst->getOpcode();
+ if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
return None;
+ if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+ return None;
+
+ // Now, try to find the specific G_CONSTANT. Start by assuming that the
+ // register we will offset is the LHS, and the register containing the
+ // constant is the RHS.
+ Register OffsetReg = OffsetInst->getOperand(1).getReg();
+ Register ConstantReg = OffsetInst->getOperand(2).getReg();
+ auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ if (!ValAndVReg) {
+ // We didn't get a constant on the RHS. If the opcode is a shift, then
+ // we're done.
+ if (OffsetOpc == TargetOpcode::G_SHL)
+ return None;
+
+ // If we have a G_MUL, we can use either register. Try looking at the RHS.
+ std::swap(OffsetReg, ConstantReg);
+ ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ if (!ValAndVReg)
+ return None;
+ }
+
// The value must fit into 3 bits, and must be positive. Make sure that is
// true.
int64_t ImmVal = ValAndVReg->Value;
+
+ // Since we're going to pull this into a shift, the constant value must be
+ // a power of 2. If we got a multiply, then we need to check this.
+ if (OffsetOpc == TargetOpcode::G_MUL) {
+ if (!isPowerOf2_32(ImmVal))
+ return None;
+
+ // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
+ ImmVal = Log2_32(ImmVal);
+ }
+
if ((ImmVal & 0x7) != ImmVal)
return None;
// offset. Signify that we are shifting by setting the shift flag to 1.
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
- [=](MachineInstrBuilder &MIB) { MIB.add(Shl->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
}};
define void @more_than_one_use(i64* %addr) { ret void }
define void @ldrxrox_shl(i64* %addr) { ret void }
define void @ldrdrox_shl(i64* %addr) { ret void }
+ define void @ldrxrox_mul_rhs(i64* %addr) { ret void }
+ define void @ldrdrox_mul_rhs(i64* %addr) { ret void }
+ define void @ldrxrox_mul_lhs(i64* %addr) { ret void }
+ define void @ldrdrox_mul_lhs(i64* %addr) { ret void }
+ define void @mul_not_pow_2(i64* %addr) { ret void }
+ define void @mul_wrong_pow_2(i64* %addr) { ret void }
define void @more_than_one_use_shl_1(i64* %addr) { ret void }
define void @more_than_one_use_shl_2(i64* %addr) { ret void }
define void @more_than_one_use_shl_lsl_fast(i64* %addr) #1 { ret void }
$d2 = COPY %5(s64)
RET_ReallyLR implicit $d2
+...
+---
+name: ldrxrox_mul_rhs
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: ldrxrox_mul_rhs
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
+ ; CHECK: $x2 = COPY [[LDRXroX]]
+ ; CHECK: RET_ReallyLR implicit $x2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 8
+ %2:gpr(s64) = G_MUL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_GEP %3, %2
+ %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
+ $x2 = COPY %5(s64)
+ RET_ReallyLR implicit $x2
+
+...
+---
+name: ldrdrox_mul_rhs
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $d2
+ ; CHECK-LABEL: name: ldrdrox_mul_rhs
+ ; CHECK: liveins: $x0, $x1, $d2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
+ ; CHECK: $d2 = COPY [[LDRDroX]]
+ ; CHECK: RET_ReallyLR implicit $d2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 8
+ %2:gpr(s64) = G_MUL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_GEP %3, %2
+ %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
+ $d2 = COPY %5(s64)
+ RET_ReallyLR implicit $d2
+
+...
+---
+name: ldrxrox_mul_lhs
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: ldrxrox_mul_lhs
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
+ ; CHECK: $x2 = COPY [[LDRXroX]]
+ ; CHECK: RET_ReallyLR implicit $x2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 8
+ %2:gpr(s64) = G_MUL %1, %0(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_GEP %3, %2
+ %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
+ $x2 = COPY %5(s64)
+ RET_ReallyLR implicit $x2
+
+...
+---
+name: ldrdrox_mul_lhs
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $d2
+ ; CHECK-LABEL: name: ldrdrox_mul_lhs
+ ; CHECK: liveins: $x0, $x1, $d2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
+ ; CHECK: $d2 = COPY [[LDRDroX]]
+ ; CHECK: RET_ReallyLR implicit $d2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 8
+ %2:gpr(s64) = G_MUL %1, %0(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_GEP %3, %2
+ %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
+ $d2 = COPY %5(s64)
+ RET_ReallyLR implicit $d2
+
+...
+---
+name: mul_not_pow_2
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; Show that we don't get a shifted load from a mul when we don't have a
+ ; power of 2. (The bit isn't set on the load.)
+ liveins: $x0, $x1, $d2
+ ; CHECK-LABEL: name: mul_not_pow_2
+ ; CHECK: liveins: $x0, $x1, $d2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 7
+ ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[MOVi64imm]], [[COPY]], $xzr
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr)
+ ; CHECK: $d2 = COPY [[LDRDroX]]
+ ; CHECK: RET_ReallyLR implicit $d2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 7
+ %2:gpr(s64) = G_MUL %1, %0(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_GEP %3, %2
+ %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
+ $d2 = COPY %5(s64)
+ RET_ReallyLR implicit $d2
+
+...
+---
+name: mul_wrong_pow_2
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ ; Show that we don't get a shifted load from a mul when we don't have
+ ; the right power of 2. (The bit isn't set on the load.)
+ liveins: $x0, $x1, $d2
+ ; CHECK-LABEL: name: mul_wrong_pow_2
+ ; CHECK: liveins: $x0, $x1, $d2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 16
+ ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[MOVi64imm]], [[COPY]], $xzr
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr)
+ ; CHECK: $d2 = COPY [[LDRDroX]]
+ ; CHECK: RET_ReallyLR implicit $d2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 16
+ %2:gpr(s64) = G_MUL %1, %0(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_GEP %3, %2
+ %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
+ $d2 = COPY %5(s64)
+ RET_ReallyLR implicit $d2
+
...
---
name: more_than_one_use_shl_1