let Predicates = [HasAVX1Only] in {
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
- (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_xmm),
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
def : Pat<(v2i64 (X86VBroadcast i64:$src)),
- (VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44)>;
}
//===----------------------------------------------------------------------===//
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: vmovq %rax, %xmm0
; X64-NEXT: movq %rax, (%rsi)
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
; X32-NEXT: movl (%ecx), %ecx
; X32-NEXT: vmovd %ecx, %xmm0
; X32-NEXT: movl %ecx, (%eax)
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: movl %eax, (%rsi)
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: vmovsd %xmm0, (%eax)
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64: ## BB#0: ## %entry
; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: vmovsd %xmm0, (%rsi)
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: vmovd %xmm0, (%eax)
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vmovss %xmm0, (%eax)
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: D3:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: vmovd %xmm0, (%rsi)
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vmovss %xmm0, (%rsi)
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: vmovd %xmm0, (%eax)
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vmovss %xmm0, (%eax)
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: retl
;
; X64-LABEL: e2:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: vmovd %xmm0, (%rsi)
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vmovss %xmm0, (%rsi)
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: retq
entry:
%q = load float, float* %ptr, align 4