if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
+ if (Subtarget->isThumb1Only())
+ setTargetDAGCombine(ISD::SHL);
setStackPointerRegisterToSaveRestore(ARM::SP);
if (Level == BeforeLegalizeTypes)
return true;
- if (Subtarget->isThumb() && Subtarget->isThumb1Only())
+ if (N->getOpcode() != ISD::SHL)
return true;
- if (N->getOpcode() != ISD::SHL)
+ if (Subtarget->isThumb1Only()) {
+ // Avoid making expensive immediates by commuting shifts. (This logic
+ // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
+ // for free.)
+ if (N->getOpcode() != ISD::SHL)
+ return true;
+ SDValue N1 = N->getOperand(0);
+ if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
+ N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
+ return true;
+ if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
+ if (Const->getAPIntValue().ult(256))
+ return false;
+ if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
+ Const->getAPIntValue().sgt(-256))
+ return false;
+ }
return true;
+ }
// Turn off commute-with-shift transform after legalization, so it doesn't
// conflict with PerformSHLSimplify. (We could try to detect when
/// combining instead of DAG legalizing because the build_vectors for 64-bit
/// vector element shift counts are generally not legal, and it is hard to see
/// their values after they get legalized to loads from a constant pool.
-static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue PerformShiftCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
+ SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
// Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
}
}
+ if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
+ N->getOperand(0)->getOpcode() == ISD::AND &&
+ N->getOperand(0)->hasOneUse()) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+ // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
+ // usually show up because instcombine prefers to canonicalize it to
+ // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
+ // out of GEP lowering in some cases.
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!ShiftAmtNode)
+ return SDValue();
+ uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
+ ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!AndMaskNode)
+ return SDValue();
+ uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
+ // Don't transform uxtb/uxth.
+ if (AndMask == 255 || AndMask == 65535)
+ return SDValue();
+ if (isMask_32(AndMask)) {
+ uint32_t MaskedBits = countLeadingZeros(AndMask);
+ if (MaskedBits > ShiftAmt) {
+ SDLoc DL(N);
+ SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(MaskedBits, DL, MVT::i32));
+ return DAG.getNode(
+ ISD::SRL, DL, MVT::i32, SHL,
+ DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
+ }
+ }
+ }
+
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SRL:
+ return PerformShiftCombine(N, DCI, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
%shl = shl i32 %shr, 3
ret i32 %shl
}
+
+define i32 @test16(i32 %x) {
+; CHECK-LABEL: test16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsls r0, r0, #28
+; CHECK-NEXT: lsrs r0, r0, #26
+; CHECK-NEXT: bx lr
+entry:
+ %0 = and i32 %x, 15
+ %shl = shl i32 %0, 2
+ ret i32 %shl
+}
+
+define i32* @test17(i32* %p, i32 %x) {
+; CHECK-LABEL: test17:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsls r1, r1, #28
+; CHECK-NEXT: lsrs r1, r1, #26
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = and i32 %x, 15
+ %shl = getelementptr i32, i32* %p, i32 %0
+ ret i32* %shl
+}
+
+define i32* @test18(i32* %p, i32 %x) {
+; CHECK-LABEL: test18:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adds r1, r1, #1
+; CHECK-NEXT: lsls r1, r1, #28
+; CHECK-NEXT: lsrs r1, r1, #26
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: bx lr
+entry:
+ %xx = add i32 %x, 1
+ %0 = and i32 %xx, 15
+ %shl = getelementptr i32, i32* %p, i32 %0
+ ret i32* %shl
+}
+
+define i32* @test19(i32* %p, i32 %x) {
+; CHECK-LABEL: test19:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: subs r1, r1, #1
+; CHECK-NEXT: lsls r1, r1, #28
+; CHECK-NEXT: lsrs r1, r1, #26
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: bx lr
+entry:
+ %xx = sub i32 %x, 1
+ %0 = and i32 %xx, 15
+ %shl = getelementptr i32, i32* %p, i32 %0
+ ret i32* %shl
+}
+
+define i32* @test20(i32* %p, i32 %x) {
+; CHECK-LABEL: test20:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: subs r1, r1, #1
+; CHECK-NEXT: lsls r1, r1, #28
+; CHECK-NEXT: lsrs r1, r1, #26
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: bx lr
+entry:
+ %xx = add i32 %x, 15
+ %0 = and i32 %xx, 15
+ %shl = getelementptr i32, i32* %p, i32 %0
+ ret i32* %shl
+}