--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s
+
+define void @f_f(<16 x double>* %ptr) {
+; CHECK-LABEL: f_f:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, (%rax)
+; CHECK-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
+; CHECK-NEXT: vmovapd (%rdi), %zmm1
+; CHECK-NEXT: vmovapd 64(%rdi), %zmm2
+; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1
+; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovapd %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovapd %zmm2, 64(%rdi)
+; CHECK-NEXT: vmovapd %zmm1, (%rdi)
+ store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8>* undef
+ %load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef
+ %v.i.i.i.i = load <16 x double>, <16 x double>* %ptr
+ %mask_vec_i1.i.i.i51.i.i = icmp ne <16 x i8> %load_mask8.i.i.i, zeroinitializer
+ %v1.i.i.i.i = select <16 x i1> %mask_vec_i1.i.i.i51.i.i, <16 x double> undef, <16 x double> %v.i.i.i.i
+ store <16 x double> %v1.i.i.i.i, <16 x double>* %ptr
+ unreachable
+}