if (N->getOpcode() == ISD::FNEG)
return N->getOperand(0);
+ unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();
+
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
- auto VT = Op->getValueType(0);
+ EVT VT = Op->getValueType(0);
+ // Make sure the element size does't change.
+ if (VT.getScalarSizeInBits() != ScalarSize)
+ return SDValue();
+
if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
// For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
if (!SVOp->getOperand(1).isUndef())
return SDValue();
if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
- return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
- SVOp->getMask());
+ if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
+ return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
+ SVOp->getMask());
return SDValue();
}
unsigned Opc = Op.getOpcode();
if (!InsVector.isUndef())
return SDValue();
if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
- NegInsVal, Op.getOperand(2));
+ if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
+ NegInsVal, Op.getOperand(2));
return SDValue();
}
if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
return SDValue();
- SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
- if (!Op1.getValueType().isFloatingPoint())
- return SDValue();
-
- SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op0 = Op.getOperand(0);
// For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
// masks. For FSUB, we have to check if constant bits of Op0 are sign bit
SmallVector<APInt, 16> EltBits;
// Extract constant bits and see if they are all sign bit masks. Ignore the
// undef elements.
- if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
+ if (getTargetConstantBitsFromNode(Op1, ScalarSize,
UndefElts, EltBits,
/* AllowWholeUndefs */ true,
/* AllowPartialUndefs */ false)) {
if (!NegVal)
return SDValue();
+ // FIXME: Should we bitcast instead?
+ if (NegVal.getValueType() != VT)
+ return SDValue();
+
unsigned NewOpcode;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
; X86-LABEL: test_mm_mask_fmsub_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm2
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fmsub_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__W, i64 0
; X86-LABEL: test_mm_maskz_fmsub_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm2
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_fmsub_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__A, i64 0
; X86-LABEL: test_mm_mask3_fmsub_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm3
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm3, %xmm0, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; X86-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask3_fmsub_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm3
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm3, %xmm0, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; X64-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X64-NEXT: vmovapd %xmm2, %xmm0
; X64-NEXT: retq
entry:
; X86-LABEL: test_mm_mask_fnmadd_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fnmadd_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__W, i64 0
; X86-LABEL: test_mm_maskz_fnmadd_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_fnmadd_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__A, i64 0
; X86-LABEL: test_mm_mask3_fnmadd_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; X86-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask3_fnmadd_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; X64-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X64-NEXT: vmovapd %xmm2, %xmm0
; X64-NEXT: retq
entry: