ArrayRef<SDValue> Ops, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
+
if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
return DAG.getUNDEF(VT);
Op0.getOpcode() == X86ISD::SUBV_BROADCAST)
return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
+ // concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
+ if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
+ (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0))))
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
+ Op0.getOperand(0),
+ DAG.getIntPtrConstant(0, DL)));
+
// concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
- if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR && Subtarget.hasAVX() &&
+ if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
(Subtarget.hasAVX2() ||
(VT.getScalarSizeInBits() >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
Op0.getOperand(0).getValueType() == VT.getScalarType())
; SSE42-NEXT: movapd %xmm0, %xmm1
; SSE42-NEXT: retq
;
-; AVX1-LABEL: load_splat_8f32_4f32_01010101:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_8f32_4f32_01010101:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_8f32_4f32_01010101:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_8f32_4f32_01010101:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
%ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>