In, DAG.getUNDEF(SVT)));
}
+/// Horizontal vector math instructions may be slower than normal math with
+/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
+/// implementation, and likely shuffle complexity of the alternate sequence.
+static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool HasFastHOps = Subtarget.hasFastHorizontalOps();
+ return !IsSingleSource || IsOptimizingSize || HasFastHOps;
+}
+
/// The only differences between FABS and FNEG are the mask and the logic op.
/// FNEG also has a folding opportunity for FNEG(FABS(x)).
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
return true;
}
-/// Horizontal vector math instructions may be slower than normal math with
-/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
-/// implementation, and likely shuffle complexity of the alternate sequence.
-static bool shouldCombineToHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize();
- bool HasFastHOps = Subtarget.hasFastHorizontalOps();
- return !IsSingleSource || IsOptimizingSize || HasFastHOps;
-}
-
/// Do target-specific dag combines on floating-point adds/subs.
static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
bool IsFadd = N->getOpcode() == ISD::FADD;
+ auto HorizOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode");
// Try to synthesize horizontal add/sub from adds/subs of shuffles.
if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, IsFadd) &&
- shouldCombineToHorizontalOp(LHS == RHS, DAG, Subtarget)) {
- auto NewOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
- return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
- }
+ shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget))
+ return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
+
return SDValue();
}
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true) &&
- shouldCombineToHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
+ shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false) &&
- shouldCombineToHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
+ shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);