/// Try to combine the compare into a call to the llvm.uadd.with.overflow
/// intrinsic. Return true if any changes were made.
-static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI) {
- // TODO: Why is this transform limited by this condition?
- if (TLI.hasMultipleConditionRegisters())
- return false;
-
+static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
+ const DataLayout &DL) {
Value *A, *B;
Instruction *AddI;
if (!match(Cmp,
m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
return false;
+ // Allow the transform as long as we have an integer type that is not
+ // obviously illegal and unsupported.
Type *Ty = AddI->getType();
if (!isa<IntegerType>(Ty))
return false;
+ EVT CodegenVT = TLI.getValueType(DL, Ty);
+ if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT))
+ return false;
// We don't want to move around uses of condition values this late, so we we
// check if it is legal to create the call to the intrinsic in the basic
return MadeChange;
}
-static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
+ const DataLayout &DL) {
if (sinkCmpExpression(Cmp, TLI))
return true;
- if (combineToUAddWithOverflow(Cmp, TLI))
+ if (combineToUAddWithOverflow(Cmp, TLI, DL))
return true;
return false;
}
if (CmpInst *CI = dyn_cast<CmpInst>(I))
- if (TLI && optimizeCmpExpression(CI, *TLI))
+ if (TLI && optimizeCmpExpression(CI, *TLI, *DL))
return true;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi 5, 3, 42
; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31
+; CHECK-NEXT: addi 3, 3, 42
+; CHECK-NEXT: andi. 4, 3, 256
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: clrlwi 6, 5, 24
-; CHECK-NEXT: cmplw 3, 6
-; CHECK-NEXT: isel 3, 4, 5, 1
+; CHECK-NEXT: isel 3, 3, 4, 2
; CHECK-NEXT: blr
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi 5, 3, 42
; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31
+; CHECK-NEXT: addi 3, 3, 42
+; CHECK-NEXT: andis. 4, 3, 1
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: clrlwi 6, 5, 16
-; CHECK-NEXT: cmplw 3, 6
-; CHECK-NEXT: isel 3, 4, 5, 1
+; CHECK-NEXT: isel 3, 3, 4, 2
; CHECK-NEXT: blr
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
; CHECK: # %bb.0:
; CHECK-NEXT: addi 5, 3, 42
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: cmplw 0, 3, 5
-; CHECK-NEXT: isel 3, 4, 5, 1
+; CHECK-NEXT: cmplw 0, 5, 3
+; CHECK-NEXT: isel 3, 4, 5, 0
; CHECK-NEXT: blr
%a = add i32 %x, 42
%c = icmp ugt i32 %x, %a
; CHECK: # %bb.0:
; CHECK-NEXT: addi 5, 3, 42
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: cmpld 3, 5
-; CHECK-NEXT: isel 3, 4, 5, 1
+; CHECK-NEXT: cmpld 5, 3
+; CHECK-NEXT: isel 3, 4, 5, 0
; CHECK-NEXT: blr
%a = add i64 %x, 42
%c = icmp ugt i64 %x, %a
define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: add 4, 3, 4
+; CHECK-NEXT: rlwinm 4, 4, 0, 24, 31
; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31
-; CHECK-NEXT: li 5, -1
-; CHECK-NEXT: clrlwi 6, 4, 24
-; CHECK-NEXT: cmplw 3, 6
-; CHECK-NEXT: isel 3, 5, 4, 1
+; CHECK-NEXT: add 3, 3, 4
+; CHECK-NEXT: andi. 4, 3, 256
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: isel 3, 3, 4, 2
; CHECK-NEXT: blr
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: add 4, 3, 4
+; CHECK-NEXT: rlwinm 4, 4, 0, 16, 31
; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31
-; CHECK-NEXT: li 5, -1
-; CHECK-NEXT: clrlwi 6, 4, 16
-; CHECK-NEXT: cmplw 3, 6
-; CHECK-NEXT: isel 3, 5, 4, 1
+; CHECK-NEXT: add 3, 3, 4
+; CHECK-NEXT: andis. 4, 3, 1
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: isel 3, 3, 4, 2
; CHECK-NEXT: blr
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
; CHECK: # %bb.0:
; CHECK-NEXT: add 4, 3, 4
; CHECK-NEXT: li 5, -1
-; CHECK-NEXT: cmplw 0, 3, 4
-; CHECK-NEXT: isel 3, 5, 4, 1
+; CHECK-NEXT: cmplw 0, 4, 3
+; CHECK-NEXT: isel 3, 5, 4, 0
; CHECK-NEXT: blr
%a = add i32 %x, %y
%c = icmp ugt i32 %x, %a
; CHECK: # %bb.0:
; CHECK-NEXT: add 4, 3, 4
; CHECK-NEXT: li 5, -1
-; CHECK-NEXT: cmpld 3, 4
-; CHECK-NEXT: isel 3, 5, 4, 1
+; CHECK-NEXT: cmpld 4, 3
+; CHECK-NEXT: isel 3, 5, 4, 0
; CHECK-NEXT: blr
%a = add i64 %x, %y
%c = icmp ugt i64 %x, %a