return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
}
+ // broadcast(bitcast(src)) -> bitcast(broadcast(src))
+ // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
+ if (Src.getOpcode() == ISD::BITCAST &&
+ SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) {
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
+ VT.getVectorNumElements());
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
+ }
return SDValue();
}
case X86ISD::PSHUFD:
; X64-LABEL: A2:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: vmovq %rax, %xmm0
; X64-NEXT: movq %rax, (%rsi)
-; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %ecx
-; X32-NEXT: vmovd %ecx, %xmm0
; X32-NEXT: movl %ecx, (%eax)
+; X32-NEXT: vmovd %ecx, %xmm0
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
; X64-LABEL: B3:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: movl %eax, (%rsi)
+; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
; X32-LABEL: broadcast_mem_v4i16_v16i16:
; X32: ## %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-NEXT: vbroadcastsd (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: broadcast_mem_v4i16_v16i16:
; X32-AVX2-LABEL: test_2xi32_to_8xi32_mem:
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0
; X32-AVX2-NEXT: retl
;
; X32-AVX512-LABEL: test_2xi32_to_8xi32_mem:
; X32-AVX512: # %bb.0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-AVX512-NEXT: vbroadcastsd (%eax), %ymm0
; X32-AVX512-NEXT: retl
;
; X64-AVX1-LABEL: test_2xi32_to_8xi32_mem:
; X32-AVX2-LABEL: test_2xi32_to_16xi32_mem:
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0
; X32-AVX2-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX2-NEXT: retl
;