; CHECK-LABEL: bcast_unfold_cmp_v4f32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB120_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm1
-; CHECK-NEXT: vcmpltps {{.*}}(%rip){1to4}, %xmm1, %k1
-; CHECK-NEXT: vblendmps %xmm1, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovups %xmm1, 4096(%rdi,%rax)
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm2
+; CHECK-NEXT: vcmpltps %xmm0, %xmm2, %k1
+; CHECK-NEXT: vblendmps %xmm2, %xmm1, %xmm2 {%k1}
+; CHECK-NEXT: vmovups %xmm2, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB120_1
; CHECK-NEXT: # %bb.2: # %bb10
; CHECK-LABEL: bcast_unfold_cmp_v8f32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB121_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm1
-; CHECK-NEXT: vcmpltps {{.*}}(%rip){1to8}, %ymm1, %k1
-; CHECK-NEXT: vblendmps %ymm1, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vmovups %ymm1, 4096(%rdi,%rax)
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm2
+; CHECK-NEXT: vcmpltps %ymm0, %ymm2, %k1
+; CHECK-NEXT: vblendmps %ymm2, %ymm1, %ymm2 {%k1}
+; CHECK-NEXT: vmovups %ymm2, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB121_1
; CHECK-NEXT: # %bb.2: # %bb10
; CHECK-LABEL: bcast_unfold_cmp_v16f32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} zmm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB122_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm1
-; CHECK-NEXT: vcmpltps {{.*}}(%rip){1to16}, %zmm1, %k1
-; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovups %zmm1, 4096(%rdi,%rax)
+; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm2
+; CHECK-NEXT: vcmpltps %zmm0, %zmm2, %k1
+; CHECK-NEXT: vblendmps %zmm2, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT: vmovups %zmm2, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB122_1
; CHECK-NEXT: # %bb.2: # %bb10
; CHECK-LABEL: bcast_unfold_cmp_v4f64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB124_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm1
-; CHECK-NEXT: vcmpltpd {{.*}}(%rip){1to4}, %ymm1, %k1
-; CHECK-NEXT: vblendmpd %ymm1, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vmovupd %ymm1, 8192(%rdi,%rax)
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm2
+; CHECK-NEXT: vcmpltpd %ymm0, %ymm2, %k1
+; CHECK-NEXT: vblendmpd %ymm2, %ymm1, %ymm2 {%k1}
+; CHECK-NEXT: vmovupd %ymm2, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB124_1
; CHECK-NEXT: # %bb.2: # %bb10
; CHECK-LABEL: bcast_unfold_cmp_v8f64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB125_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm1
-; CHECK-NEXT: vcmpltpd {{.*}}(%rip){1to8}, %zmm1, %k1
-; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovupd %zmm1, 8192(%rdi,%rax)
+; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm2
+; CHECK-NEXT: vcmpltpd %zmm0, %zmm2, %k1
+; CHECK-NEXT: vblendmpd %zmm2, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT: vmovupd %zmm2, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB125_1
; CHECK-NEXT: # %bb.2: # %bb10