bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt);
+ bool isZExt, bool isEquality);
bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt) {
+ bool isZExt, bool isEquality) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
// TODO: Verify compares.
case MVT::f32:
isICmp = false;
- CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
+ else
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
break;
case MVT::f64:
isICmp = false;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
+ else
CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
break;
case MVT::i1:
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
// Now set a register based on the comparison. Explicitly set the predicates
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
- ARMCC::CondCodes &CondCode2) {
+ ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
CondCode2 = ARMCC::AL;
+ InvalidOnQNaN = true;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
- case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETOEQ:
+ CondCode = ARMCC::EQ;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
- case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETONE:
+ CondCode = ARMCC::MI;
+ CondCode2 = ARMCC::GT;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
- case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUEQ:
+ CondCode = ARMCC::EQ;
+ CondCode2 = ARMCC::VS;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
- case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ case ISD::SETUNE:
+ CondCode = ARMCC::NE;
+ InvalidOnQNaN = false;
+ break;
}
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const SDLoc &dl) const {
+ SelectionDAG &DAG, const SDLoc &dl,
+ bool InvalidOnQNaN) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
+ SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
}
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- const SDLoc &dl) const;
+ const SDLoc &dl, bool InvalidOnQNaN) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
SDTCisVT<2, i32>]>;
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
//
//===----------------------------------------------------------------------===//
-def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
- [/* For disassembly only; pattern left blank */]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd)]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
%4 = fcmp une float %3, 0.000000e+00
; ARM: ldr r[[R:[0-9]+]], [r0, #2]
; ARM: vmov s0, r[[R]]
-; ARM: vcmpe.f32 s0, #0
+; ARM: vcmp.f32 s0, #0
; THUMB: ldr.w r[[R:[0-9]+]], [r0, #2]
; THUMB: vmov s0, r[[R]]
-; THUMB: vcmpe.f32 s0, #0
+; THUMB: vcmp.f32 s0, #0
ret i1 %4
}
; ARM: t1a
; THUMB: t1a
%cmp = fcmp oeq float %a, 0.000000e+00
-; ARM: vcmpe.f32 s{{[0-9]+}}, #0
-; THUMB: vcmpe.f32 s{{[0-9]+}}, #0
+; ARM: vcmp.f32 s{{[0-9]+}}, #0
+; THUMB: vcmp.f32 s{{[0-9]+}}, #0
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
; THUMB: t1b
%cmp = fcmp oeq float %a, -0.000000e+00
; ARM: vldr
-; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; ARM: vcmp.f32 s{{[0-9]+}}, s{{[0-9]+}}
; THUMB: vldr
-; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; THUMB: vcmp.f32 s{{[0-9]+}}, s{{[0-9]+}}
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
; ARM: t2a
; THUMB: t2a
%cmp = fcmp oeq double %a, 0.000000e+00
-; ARM: vcmpe.f64 d{{[0-9]+}}, #0
-; THUMB: vcmpe.f64 d{{[0-9]+}}, #0
+; ARM: vcmp.f64 d{{[0-9]+}}, #0
+; THUMB: vcmp.f64 d{{[0-9]+}}, #0
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
; THUMB: t2b
%cmp = fcmp oeq double %a, -0.000000e+00
; ARM: vldr
-; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; ARM: vcmp.f64 d{{[0-9]+}}, d{{[0-9]+}}
; THUMB: vldr
-; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; THUMB: vcmp.f64 d{{[0-9]+}}, d{{[0-9]+}}
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
ret void
}
-; Test only two variants of fcmp. These get translated to f32 vcmpe
+; Test only two variants of fcmp. These get translated to f32 vcmp
; instructions anyway.
; CHECK-ALL-LABEL: test_fcmp_une:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-VFP: vcmpe.f32
+; CHECK-VFP: vcmp.f32
; CHECK-NOVFP: bl __aeabi_fcmpeq
; CHECK-FP16: vmrs APSR_nzcv, fpscr
; CHECK-ALL: movw{{ne|eq}}
; CHECK-FP16: vcvtb.f32.f16
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-VFP: vcmpe.f32
+; CHECK-VFP: vcmp.f32
; CHECK-NOVFP: bl __aeabi_fcmpeq
; CHECK-FP16: vmrs APSR_nzcv, fpscr
; CHECK-LIBCALL: movw{{ne|eq}}
; CHECK-LABEL: t1:
; CHECK: vldr [[S0:s[0-9]+]],
; CHECK: vldr [[S1:s[0-9]+]],
-; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vcmp.f32 [[S1]], [[S0]]
; CHECK: vmrs APSR_nzcv, fpscr
; CHECK: beq
%0 = load float, float* %a
; CHECK: bfc [[REG2]], #31, #1
; CHECK: cmp [[REG1]], #0
; CHECK: cmpeq [[REG2]], #0
-; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vcmp.f32
; CHECK-NOT: vmrs
; CHECK: bne
%0 = load double, double* %a
; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
; CHECK: tst [[REG3]], [[REG4]]
-; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vcmp.f32
; CHECK-NOT: vmrs
; CHECK: bne
%0 = load float, float* %a
define i32 @f2(float %a) {
;CHECK-LABEL: f2:
-;CHECK: vcmpe.f32
+;CHECK: vcmp.f32
;CHECK: moveq
entry:
%tmp = fcmp oeq float %a, 1.000000e+00 ; <i1> [#uses=1]
define i32 @f6(float %a) {
;CHECK-LABEL: f6:
-;CHECK: vcmpe.f32
+;CHECK: vcmp.f32
;CHECK: movne
entry:
%tmp = fcmp une float %a, 1.000000e+00 ; <i1> [#uses=1]
; CHECK-ARMv4: moveq r0, #42
; CHECK-ARMv7-LABEL: f7:
-; CHECK-ARMv7: vcmpe.f32
+; CHECK-ARMv7: vcmp.f32
; CHECK-ARMv7: vmrs APSR_nzcv, fpscr
; CHECK-ARMv7: movweq
; CHECK-ARMv7-NOT: vmrs
%tst1 = fcmp oeq float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f32 s0, s2, s3
ret void
}
%tst1 = fcmp oeq float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f64 d16, d1, d2
ret void
}
%tst1 = fcmp une float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f32 s0, s3, s2
ret void
}
%tst1 = fcmp une float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f64 d16, d2, d1
ret void
}
define i1 @cmp_f_oeq(float %a, float %b) {
; CHECK-LABEL: cmp_f_oeq:
; NONE: bl __aeabi_fcmpeq
-; HARD: vcmpe.f32
+; HARD: vcmp.f32
; HARD: moveq r0, #1
%1 = fcmp oeq float %a, %b
ret i1 %1
; CHECK-LABEL: cmp_f_one:
; NONE: bl __aeabi_fcmpgt
; NONE: bl __aeabi_fcmplt
-; HARD: vcmpe.f32
+; HARD: vcmp.f32
; HARD: movmi r0, #1
; HARD: movgt r0, #1
%1 = fcmp one float %a, %b
; CHECK-LABEL: cmp_f_ueq:
; NONE: bl __aeabi_fcmpeq
; NONE: bl __aeabi_fcmpun
-; HARD: vcmpe.f32
+; HARD: vcmp.f32
; HARD: moveq r0, #1
; HARD: movvs r0, #1
%1 = fcmp ueq float %a, %b
define i1 @cmp_f_une(float %a, float %b) {
; CHECK-LABEL: cmp_f_une:
; NONE: bl __aeabi_fcmpeq
-; HARD: vcmpe.f32
+; HARD: vcmp.f32
; HARD: movne r0, #1
%1 = fcmp une float %a, %b
ret i1 %1
; CHECK-LABEL: cmp_d_oeq:
; NONE: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpeq
-; DP: vcmpe.f64
+; DP: vcmp.f64
; DP: moveq r0, #1
%1 = fcmp oeq double %a, %b
ret i1 %1
; NONE: bl __aeabi_dcmplt
; SP: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmplt
-; DP: vcmpe.f64
+; DP: vcmp.f64
; DP: movmi r0, #1
; DP: movgt r0, #1
%1 = fcmp one double %a, %b
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpun
-; DP: vcmpe.f64
+; DP: vcmp.f64
; DP: moveq r0, #1
; DP: movvs r0, #1
%1 = fcmp ueq double %a, %b
; CHECK-LABEL: cmp_d_une:
; NONE: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpeq
-; DP: vcmpe.f64
+; DP: vcmp.f64
; DP: movne r0, #1
%1 = fcmp une double %a, %b
ret i1 %1