[AArch64] Improve codegen for inverted overflow checking intrinsics

author Amara Emerson <aemerson@apple.com>

Mon, 9 Oct 2017 15:15:09 +0000 (15:15 +0000)

committer Amara Emerson <aemerson@apple.com>

Mon, 9 Oct 2017 15:15:09 +0000 (15:15 +0000)
author Amara Emerson <aemerson@apple.com>
Mon, 9 Oct 2017 15:15:09 +0000 (15:15 +0000)
committer Amara Emerson <aemerson@apple.com>
Mon, 9 Oct 2017 15:15:09 +0000 (15:15 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index ff9bf2a7daf988613b8e85c5035bb00b2c7fe2eb..ea63919474ccc183ca2802f1f79b35eea56f2d66 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1972,10 +1972,41 @@ SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
    return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
  }
  
+// Returns true if the given Op is the overflow flag result of an overflow
+// intrinsic operation.
+static bool isOverflowIntrOpRes(SDValue Op) {
+  unsigned Opc = Op.getOpcode();
+  return (Op.getResNo() == 1 &&
+          (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+           Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
+}
+
  static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
    SDValue Sel = Op.getOperand(0);
    SDValue Other = Op.getOperand(1);
+  SDLoc dl(Sel);
  
+  // If the operand is an overflow checking operation, invert the condition
+  // code and kill the Not operation. I.e., transform:
+  // (xor (overflow_op_bool, 1))
+  //   -->
+  // (csel 1, 0, invert(cc), overflow_op_bool)
+  // ... which later gets transformed to just a cset instruction with an
+  // inverted condition code, rather than a cset + eor sequence.
+  if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
+    // Only lower legal XALUO ops.
+    if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
+      return SDValue();
+
+    SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+    SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+    AArch64CC::CondCode CC;
+    SDValue Value, Overflow;
+    std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
+    SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+    return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
+                       CCVal, Overflow);
+  }
    // If neither operand is a SELECT_CC, give up.
    if (Sel.getOpcode() != ISD::SELECT_CC)
      std::swap(Sel, Other);
@@ -1994,7 +2025,6 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
    SDValue RHS = Sel.getOperand(1);
    SDValue TVal = Sel.getOperand(2);
    SDValue FVal = Sel.getOperand(3);
-  SDLoc dl(Sel);
  
    // FIXME: This could be generalized to non-integer comparisons.
    if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
@@ -3958,10 +3988,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
  
    // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
    // instruction.
-  unsigned Opc = LHS.getOpcode();
-  if (LHS.getResNo() == 1 && isOneConstant(RHS) &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+  if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS)) {
      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
             "Unexpected condition code.");
      // Only lower legal XALUO ops.
@@ -4453,12 +4480,9 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
    SDValue FVal = Op->getOperand(2);
    SDLoc DL(Op);
  
-  unsigned Opc = CCVal.getOpcode();
    // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
    // instruction.
-  if (CCVal.getResNo() == 1 &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+  if (isOverflowIntrOpRes(CCVal)) {
      // Only lower legal XALUO ops.
      if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
        return SDValue();
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll

index 8b212aa6c1dab17e4b0c3e1a694b8ef8f8d16c1a..fc167d2f34d4279e40a5e16145fd390b1c9ba25d 100644 (file)
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -282,6 +282,17 @@ entry:
    ret i32 %ret
  }
  
+define i1 @saddo.not.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  saddo.not.i32
+; CHECK:        cmn w0, w1
+; CHECK-NEXT:   cset w0, vc
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
  entry:
  ; CHECK-LABEL:  saddo.select.i64
@@ -293,6 +304,17 @@ entry:
    ret i64 %ret
  }
  
+define i1 @saddo.not.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  saddo.not.i64
+; CHECK:        cmn x0, x1
+; CHECK-NEXT:   cset w0, vc
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
  entry:
  ; CHECK-LABEL:  uaddo.select.i32
@@ -304,6 +326,17 @@ entry:
    ret i32 %ret
  }
  
+define i1 @uaddo.not.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  uaddo.not.i32
+; CHECK:        cmn w0, w1
+; CHECK-NEXT:   cset w0, lo
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
  entry:
  ; CHECK-LABEL:  uaddo.select.i64
@@ -315,6 +348,17 @@ entry:
    ret i64 %ret
  }
  
+define i1 @uaddo.not.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  uaddo.not.i64
+; CHECK:        cmn x0, x1
+; CHECK-NEXT:   cset w0, lo
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i32 @ssubo.select.i32(i32 %v1, i32 %v2) {
  entry:
  ; CHECK-LABEL:  ssubo.select.i32
@@ -326,6 +370,17 @@ entry:
    ret i32 %ret
  }
  
+define i1 @ssubo.not.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  ssubo.not.i32
+; CHECK:        cmp w0, w1
+; CHECK-NEXT:   cset w0, vc
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
  entry:
  ; CHECK-LABEL:  ssubo.select.i64
@@ -337,6 +392,17 @@ entry:
    ret i64 %ret
  }
  
+define i1 @ssub.not.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  ssub.not.i64
+; CHECK:        cmp x0, x1
+; CHECK-NEXT:   cset w0, vc
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
  entry:
  ; CHECK-LABEL:  usubo.select.i32
@@ -348,6 +414,17 @@ entry:
    ret i32 %ret
  }
  
+define i1 @usubo.not.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  usubo.not.i32
+; CHECK:        cmp w0, w1
+; CHECK-NEXT:   cset w0, hs
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
  entry:
  ; CHECK-LABEL:  usubo.select.i64
@@ -359,6 +436,17 @@ entry:
    ret i64 %ret
  }
  
+define i1 @usubo.not.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  usubo.not.i64
+; CHECK:        cmp x0, x1
+; CHECK-NEXT:   cset w0, hs
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i32 @smulo.select.i32(i32 %v1, i32 %v2) {
  entry:
  ; CHECK-LABEL:  smulo.select.i32
@@ -372,6 +460,19 @@ entry:
    ret i32 %ret
  }
  
+define i1 @smulo.not.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  smulo.not.i32
+; CHECK:        smull   x[[MREG:[0-9]+]], w0, w1
+; CHECK-NEXT:   lsr     x[[SREG:[0-9]+]], x[[MREG]], #32
+; CHECK-NEXT:   cmp     w[[SREG]], w[[MREG]], asr #31
+; CHECK-NEXT:   cset    w0, eq
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
  entry:
  ; CHECK-LABEL:  smulo.select.i64
@@ -385,6 +486,19 @@ entry:
    ret i64 %ret
  }
  
+define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  smulo.not.i64
+; CHECK:        mul     [[MREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   smulh   [[HREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   cmp     [[HREG]], [[MREG]], asr #63
+; CHECK-NEXT:   cset    w0, eq
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
  entry:
  ; CHECK-LABEL:  umulo.select.i32
@@ -397,6 +511,18 @@ entry:
    ret i32 %ret
  }
  
+define i1 @umulo.not.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  umulo.not.i32
+; CHECK:        umull   [[MREG:x[0-9]+]], w0, w1
+; CHECK-NEXT:   cmp     xzr, [[MREG]], lsr #32
+; CHECK-NEXT:   cset    w0, eq
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
  entry:
  ; CHECK-LABEL:  umulo.select.i64
@@ -409,6 +535,18 @@ entry:
    ret i64 %ret
  }
  
+define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  umulo.not.i64
+; CHECK:        umulh   [[MREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   cmp     xzr, [[MREG]]
+; CHECK-NEXT:   cset    w0, eq
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
  
  ;
  ; Check the use of the overflow bit in combination with a branch instruction.
author	Amara Emerson <aemerson@apple.com>
	Mon, 9 Oct 2017 15:15:09 +0000 (15:15 +0000)
committer	Amara Emerson <aemerson@apple.com>
	Mon, 9 Oct 2017 15:15:09 +0000 (15:15 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-xaluo.ll		patch \| blob \| history