[CGP] adjust target constraints for forming uaddo

author Sanjay Patel <spatel@rotateright.com>

Sun, 3 Feb 2019 17:53:09 +0000 (17:53 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Sun, 3 Feb 2019 17:53:09 +0000 (17:53 +0000)
author Sanjay Patel <spatel@rotateright.com>
Sun, 3 Feb 2019 17:53:09 +0000 (17:53 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Sun, 3 Feb 2019 17:53:09 +0000 (17:53 +0000)
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp

index 792e4a537eaa87d1807850e99771dfd0a0595ed6..bcb899a9e02ed089f686bcf31fec1117f1e3d794 100644 (file)
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1149,20 +1149,22 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
  
  /// Try to combine the compare into a call to the llvm.uadd.with.overflow
  /// intrinsic. Return true if any changes were made.
-static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI) {
-  // TODO: Why is this transform limited by this condition?
-  if (TLI.hasMultipleConditionRegisters())
-    return false;
-
+static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
+                                      const DataLayout &DL) {
    Value *A, *B;
    Instruction *AddI;
    if (!match(Cmp,
               m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
      return false;
  
+  // Allow the transform as long as we have an integer type that is not
+  // obviously illegal and unsupported.
    Type *Ty = AddI->getType();
    if (!isa<IntegerType>(Ty))
      return false;
+  EVT CodegenVT = TLI.getValueType(DL, Ty);
+  if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT))
+    return false;
  
    // We don't want to move around uses of condition values this late, so we we
    // check if it is legal to create the call to the intrinsic in the basic
@@ -1263,11 +1265,12 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
    return MadeChange;
  }
  
-static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
+                                  const DataLayout &DL) {
    if (sinkCmpExpression(Cmp, TLI))
      return true;
  
-  if (combineToUAddWithOverflow(Cmp, TLI))
+  if (combineToUAddWithOverflow(Cmp, TLI, DL))
      return true;
  
    return false;
@@ -6714,7 +6717,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
    }
  
    if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    if (TLI && optimizeCmpExpression(CI, *TLI))
+    if (TLI && optimizeCmpExpression(CI, *TLI, *DL))
        return true;
  
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
diff --git a/test/CodeGen/PowerPC/sat-add.ll b/test/CodeGen/PowerPC/sat-add.ll

index 515ddfe976cf93c23874f6ecdb3b8bda07848164..7f4e1b8cc2acd31ce3da514b5e7570509461ba0f 100644 (file)
--- a/test/CodeGen/PowerPC/sat-add.ll
+++ b/test/CodeGen/PowerPC/sat-add.ll
@@ -24,12 +24,11 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
  define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
  ; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
  ; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
+; CHECK-NEXT:    addi 3, 3, 42
+; CHECK-NEXT:    andi. 4, 3, 256
  ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    clrlwi 6, 5, 24
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    isel 3, 3, 4, 2
  ; CHECK-NEXT:    blr
    %a = add i8 %x, 42
    %c = icmp ugt i8 %x, %a
@@ -70,12 +69,11 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
  define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
  ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
  ; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
+; CHECK-NEXT:    addi 3, 3, 42
+; CHECK-NEXT:    andis. 4, 3, 1
  ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    clrlwi 6, 5, 16
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    isel 3, 3, 4, 2
  ; CHECK-NEXT:    blr
    %a = add i16 %x, 42
    %c = icmp ugt i16 %x, %a
@@ -117,8 +115,8 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    addi 5, 3, 42
  ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmplw 0, 3, 5
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    cmplw 0, 5, 3
+; CHECK-NEXT:    isel 3, 4, 5, 0
  ; CHECK-NEXT:    blr
    %a = add i32 %x, 42
    %c = icmp ugt i32 %x, %a
@@ -160,8 +158,8 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    addi 5, 3, 42
  ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmpld 3, 5
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    cmpld 5, 3
+; CHECK-NEXT:    isel 3, 4, 5, 0
  ; CHECK-NEXT:    blr
    %a = add i64 %x, 42
    %c = icmp ugt i64 %x, %a
@@ -204,12 +202,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
  define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
  ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    rlwinm 4, 4, 0, 24, 31
  ; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    clrlwi 6, 4, 24
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    andi. 4, 3, 256
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    isel 3, 3, 4, 2
  ; CHECK-NEXT:    blr
    %a = add i8 %x, %y
    %c = icmp ugt i8 %x, %a
@@ -255,12 +253,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
  define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
  ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    rlwinm 4, 4, 0, 16, 31
  ; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    clrlwi 6, 4, 16
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    andis. 4, 3, 1
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    isel 3, 3, 4, 2
  ; CHECK-NEXT:    blr
    %a = add i16 %x, %y
    %c = icmp ugt i16 %x, %a
@@ -306,8 +304,8 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    add 4, 3, 4
  ; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmplw 0, 3, 4
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    cmplw 0, 4, 3
+; CHECK-NEXT:    isel 3, 5, 4, 0
  ; CHECK-NEXT:    blr
    %a = add i32 %x, %y
    %c = icmp ugt i32 %x, %a
@@ -351,8 +349,8 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    add 4, 3, 4
  ; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    isel 3, 5, 4, 0
  ; CHECK-NEXT:    blr
    %a = add i64 %x, %y
    %c = icmp ugt i64 %x, %a
diff --git a/test/CodeGen/X86/codegen-prepare-uaddo.ll b/test/CodeGen/X86/codegen-prepare-uaddo.ll

index dbf32f0782fb461c07bac727ad6af2664e7cdb99..2bc13cc57d2cebdcf6acc544ed96f6cc9207d9a7 100644 (file)
--- a/test/CodeGen/X86/codegen-prepare-uaddo.ll
+++ b/test/CodeGen/X86/codegen-prepare-uaddo.ll
@@ -252,15 +252,14 @@ define void @test_18446744073709551615(i64*, i64*) {
  define i1 @illegal_type(i17 %x, i17* %p) {
  ; CHECK-LABEL: illegal_type:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $131071, %edi # imm = 0x1FFFF
  ; CHECK-NEXT:    addl $29, %edi
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    andl $131071, %ecx # imm = 0x1FFFF
-; CHECK-NEXT:    cmpl %edi, %ecx
-; CHECK-NEXT:    setne %al
  ; CHECK-NEXT:    movw %di, (%rsi)
-; CHECK-NEXT:    shrl $16, %ecx
-; CHECK-NEXT:    movb %cl, 2(%rsi)
+; CHECK-NEXT:    andl $131071, %edi # imm = 0x1FFFF
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrl $16, %eax
+; CHECK-NEXT:    movb %al, 2(%rsi)
+; CHECK-NEXT:    cmpl $29, %edi
+; CHECK-NEXT:    setb %al
  ; CHECK-NEXT:    retq
    %a = add i17 %x, 29
    store i17 %a, i17* %p
diff --git a/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll

index 6b91a3b3c189d856cdcfd6b7f1582ac97bc56256..6be9661cc63b3c5a2060b4c8244d18894a1e03f6 100644 (file)
--- a/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -163,11 +163,10 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
  
  define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) {
  ; CHECK-LABEL: @uaddo_i42_increment_illegal_type(
-; CHECK-NEXT:    [[UADD_OVERFLOW:%.*]] = call { i42, i1 } @llvm.uadd.with.overflow.i42(i42 [[X:%.*]], i42 1)
-; CHECK-NEXT:    [[UADD:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 0
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 1
-; CHECK-NEXT:    store i42 [[UADD]], i42* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OVERFLOW]]
+; CHECK-NEXT:    [[A:%.*]] = add i42 [[X:%.*]], 1
+; CHECK-NEXT:    [[OV:%.*]] = icmp eq i42 [[A]], 0
+; CHECK-NEXT:    store i42 [[A]], i42* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV]]
  ;
    %a = add i42 %x, 1
    %ov = icmp eq i42 %a, 0
author	Sanjay Patel <spatel@rotateright.com>
	Sun, 3 Feb 2019 17:53:09 +0000 (17:53 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Sun, 3 Feb 2019 17:53:09 +0000 (17:53 +0000)
lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
test/CodeGen/PowerPC/sat-add.ll		patch \| blob \| history
test/CodeGen/X86/codegen-prepare-uaddo.ll		patch \| blob \| history
test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll		patch \| blob \| history